// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. include "Schema.fbs"; include "SparseTensor.fbs"; include "Tensor.fbs"; namespace org.apache.arrow.flatbuf; /// ---------------------------------------------------------------------- /// Data structures for describing a table row batch (a collection of /// equal-length Arrow arrays) /// Metadata about a field at some level of a nested type tree (but not /// its children). /// /// For example, a List with values `[[1, 2, 3], null, [4], [5, 6], null]` /// would have {length: 5, null_count: 2} for its List node, and {length: 6, /// null_count: 0} for its Int16 node, as separate FieldNode structs struct FieldNode { /// The number of value slots in the Arrow array at this level of a nested /// tree length: long; /// The number of observed nulls. Fields with null_count == 0 may choose not /// to write their physical validity bitmap out as a materialized buffer, /// instead setting the length of the bitmap buffer to 0. null_count: long; } enum CompressionType:byte { // LZ4 frame format, for portability, as provided by lz4frame.h or wrappers // thereof. Not to be confused with "raw" (also called "block") format // provided by lz4.h LZ4_FRAME, // Zstandard ZSTD } /// Provided for forward compatibility in case we need to support different /// strategies for compressing the IPC message body (like whole-body /// compression rather than buffer-level) in the future enum BodyCompressionMethod:byte { /// Each constituent buffer is first compressed with the indicated /// compressor, and then written with the uncompressed length in the first 8 /// bytes as a 64-bit little-endian signed integer followed by the compressed /// buffer bytes (and then padding as required by the protocol). The /// uncompressed length may be set to -1 to indicate that the data that /// follows is not compressed, which can be useful for cases where /// compression does not yield appreciable savings. BUFFER } /// Optional compression for the memory buffers constituting IPC message /// bodies. Intended for use with RecordBatch but could be used for other /// message types table BodyCompression { /// Compressor library codec: CompressionType = LZ4_FRAME; /// Indicates the way the record batch body was compressed method: BodyCompressionMethod = BUFFER; } /// A data header describing the shared memory layout of a "record" or "row" /// batch. Some systems call this a "row batch" internally and others a "record /// batch". table RecordBatch { /// number of records / rows. The arrays in the batch should all have this /// length length: long; /// Nodes correspond to the pre-ordered flattened logical schema nodes: [FieldNode]; /// Buffers correspond to the pre-ordered flattened buffer tree /// /// The number of buffers appended to this list depends on the schema. For /// example, most primitive arrays will have 2 buffers, 1 for the validity /// bitmap and 1 for the values. For struct arrays, there will only be a /// single buffer for the validity (nulls) bitmap buffers: [Buffer]; /// Optional compression of the message body compression: BodyCompression; } /// For sending dictionary encoding information. Any Field can be /// dictionary-encoded, but in this case none of its children may be /// dictionary-encoded. /// There is one vector / column per dictionary, but that vector / column /// may be spread across multiple dictionary batches by using the isDelta /// flag table DictionaryBatch { id: long; data: RecordBatch; /// If isDelta is true the values in the dictionary are to be appended to a /// dictionary with the indicated id. If isDelta is false this dictionary /// should replace the existing dictionary. isDelta: bool = false; } /// ---------------------------------------------------------------------- /// The root Message type /// This union enables us to easily send different message types without /// redundant storage, and in the future we can easily add new message types. /// /// Arrow implementations do not need to implement all of the message types, /// which may include experimental metadata types. For maximum compatibility, /// it is best to send data using RecordBatch union MessageHeader { Schema, DictionaryBatch, RecordBatch, Tensor, SparseTensor } table Message { version: org.apache.arrow.flatbuf.MetadataVersion; header: MessageHeader; bodyLength: long; custom_metadata: [ KeyValue ]; } root_type Message;