Parquet Schema Parser/Loader

0.1. Example Usage

require "parquet"
local parser = require "lpeg.parquet"

local example_schema = [=[
message Document {
  required int64 DocId;
  optional group Links {
    repeated int64 Backward;
    repeated int64 Forward;
  }
  repeated group Name {
    repeated group Language {
      required binary Code;
      optional binary Country;
    }
    optional binary Url;
  }
}
]=]

local r1 = {
    DocId = 10,
    Links = {Forward = {20, 40, 60}},
    Name = {
        {
            Language = {
                {Code = "en-us", Country = "us"},
                {Code = "en"}
            },
            Url = "http://A"
        },
        {
            Url = "http://B"
        },
        {
            Language = {Code = "en-gb", Country = "gb"}
        }
    }
}

local r2 = {
    DocId = 20,
    Links = {Backward = {10, 30}, Forward = 80},
    Name = {Url = "http://C"}
}

local doc = parser.load_parquet_schema(example_schema)
local writer = parquet.writer("example.parquet", doc)
writer:dissect_record(r1)
writer:dissect_record(r2)
writer:close()

0.2. Example Schema with Additional Attributes

message one_of_each {
  required boolean b;
  required int32 i32;
  required int32 i32ps (DECIMAL(3,2));
  required int64 i64;
  required int96 i96;
  required float f;
  required double d;
  required binary ba = 8;
  required fixed_len_byte_array(5) flba;
}

1. Functions

1.1. load_parquet_schema

Constructs a parquet schema from the Parquet schema specification.

Arguments

  • spec (string) Parquet schema spec
  • hive_compatible (bool, nil/none default: false) - column naming convention
  • metadata_group (string, nil/none) - top level group containing Heka message header/field names (Heka sandbox only)
  • metadata_prefix (string, nil/none) - top level prefix identifing Heka message header/field names as metadata (Heka sandbox only)

Return

  • schema (userdata) or an error is thrown
  • load_metadata (function/nil) Function to add metadata into the record (Heka sandbox only)

source code: parquet.lua

results matching ""

    No results matching ""