Mozilla Telemetry Parquet Schema Documentation
Generates parquet schema documentation for each docType in the data stream.
1. Sample Configuration
filename = "moz_telemetry_parquet_schema.lua"
message_matcher = "Uuid < '\003' && Fields[docType] != NIL" -- slightly greater than a 1% sample
ticker_interval = 60
preserve_data = false
2. Sample Output
Hierarchy:
- msg.Type
- msg.Fields[docType]
- msg.Fields[sourceVersion]
- msg.Fields[docType]
The number in brackets is the number of occurrences of each dimension in the sample.
telemetry.duplicate [1415]
first-shutdown [12]
-no version- [12]
message schema {
required binary Logger (UTF8);
required fixed_len_byte_array(16) Uuid;
optional int32 Pid;
optional int32 Severity;
optional binary EnvVersion (UTF8);
required binary Hostname (UTF8);
required int64 Timestamp;
optional binary Payload (UTF8);
required binary Type (UTF8);
required group Fields {
optional binary geoSubdivision1 (UTF8);
required binary appUpdateChannel (UTF8);
required binary documentId (UTF8);
required binary docType (UTF8);
required int64 duplicateDelta;
required binary geoCountry (UTF8);
required binary geoCity (UTF8);
required binary appVersion (UTF8);
required binary appBuildId (UTF8);
required binary appName (UTF8);
}
}
source code: moz_telemetry_parquet_schema.lua