Parquet Schema Parser/Loader
0.1. Example Usage
require "parquet"
local parser = require "lpeg.parquet"
local example_schema = [=[
message Document {
required int64 DocId;
optional group Links {
repeated int64 Backward;
repeated int64 Forward;
}
repeated group Name {
repeated group Language {
required binary Code;
optional binary Country;
}
optional binary Url;
}
}
]=]
local r1 = {
DocId = 10,
Links = {Forward = {20, 40, 60}},
Name = {
{
Language = {
{Code = "en-us", Country = "us"},
{Code = "en"}
},
Url = "http://A"
},
{
Url = "http://B"
},
{
Language = {Code = "en-gb", Country = "gb"}
}
}
}
local r2 = {
DocId = 20,
Links = {Backward = {10, 30}, Forward = 80},
Name = {Url = "http://C"}
}
local doc = parser.load_parquet_schema(example_schema)
local writer = parquet.writer("example.parquet", doc)
writer:dissect_record(r1)
writer:dissect_record(r2)
writer:close()
0.2. Example Schema with Additional Attributes
message one_of_each {
required boolean b;
required int32 i32;
required int32 i32ps (DECIMAL(3,2));
required int64 i64;
required int96 i96;
required float f;
required double d;
required binary ba = 8;
required fixed_len_byte_array(5) flba;
}
1. Functions
1.1. load_parquet_schema
Constructs a parquet schema from the Parquet schema specification.
Arguments
- spec (string) Parquet schema spec
- hive_compatible (bool, nil/none default: false) - column naming convention
- metadata_group (string, nil/none) - top level group containing Heka message header/field names (Heka sandbox only)
- metadata_prefix (string, nil/none) - top level prefix identifing Heka message header/field names as metadata (Heka sandbox only)
Return
- schema (userdata) or an error is thrown
- load_metadata (function/nil) Function to add metadata into the record (Heka sandbox only)
source code: parquet.lua