summaryrefslogtreecommitdiffstats
path: root/src/arrow/format/Message.fbs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/format/Message.fbs
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/arrow/format/Message.fbs140
1 files changed, 140 insertions, 0 deletions
diff --git a/src/arrow/format/Message.fbs b/src/arrow/format/Message.fbs
new file mode 100644
index 000000000..f1c18d765
--- /dev/null
+++ b/src/arrow/format/Message.fbs
@@ -0,0 +1,140 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+include "Schema.fbs";
+include "SparseTensor.fbs";
+include "Tensor.fbs";
+
+namespace org.apache.arrow.flatbuf;
+
+/// ----------------------------------------------------------------------
+/// Data structures for describing a table row batch (a collection of
+/// equal-length Arrow arrays)
+
+/// Metadata about a field at some level of a nested type tree (but not
+/// its children).
+///
+/// For example, a List<Int16> with values `[[1, 2, 3], null, [4], [5, 6], null]`
+/// would have {length: 5, null_count: 2} for its List node, and {length: 6,
+/// null_count: 0} for its Int16 node, as separate FieldNode structs
+struct FieldNode {
+ /// The number of value slots in the Arrow array at this level of a nested
+ /// tree
+ length: long;
+
+ /// The number of observed nulls. Fields with null_count == 0 may choose not
+ /// to write their physical validity bitmap out as a materialized buffer,
+ /// instead setting the length of the bitmap buffer to 0.
+ null_count: long;
+}
+
+enum CompressionType:byte {
+ // LZ4 frame format, for portability, as provided by lz4frame.h or wrappers
+ // thereof. Not to be confused with "raw" (also called "block") format
+ // provided by lz4.h
+ LZ4_FRAME,
+
+ // Zstandard
+ ZSTD
+}
+
+/// Provided for forward compatibility in case we need to support different
+/// strategies for compressing the IPC message body (like whole-body
+/// compression rather than buffer-level) in the future
+enum BodyCompressionMethod:byte {
+ /// Each constituent buffer is first compressed with the indicated
+ /// compressor, and then written with the uncompressed length in the first 8
+ /// bytes as a 64-bit little-endian signed integer followed by the compressed
+ /// buffer bytes (and then padding as required by the protocol). The
+ /// uncompressed length may be set to -1 to indicate that the data that
+ /// follows is not compressed, which can be useful for cases where
+ /// compression does not yield appreciable savings.
+ BUFFER
+}
+
+/// Optional compression for the memory buffers constituting IPC message
+/// bodies. Intended for use with RecordBatch but could be used for other
+/// message types
+table BodyCompression {
+ /// Compressor library
+ codec: CompressionType = LZ4_FRAME;
+
+ /// Indicates the way the record batch body was compressed
+ method: BodyCompressionMethod = BUFFER;
+}
+
+/// A data header describing the shared memory layout of a "record" or "row"
+/// batch. Some systems call this a "row batch" internally and others a "record
+/// batch".
+table RecordBatch {
+ /// number of records / rows. The arrays in the batch should all have this
+ /// length
+ length: long;
+
+ /// Nodes correspond to the pre-ordered flattened logical schema
+ nodes: [FieldNode];
+
+ /// Buffers correspond to the pre-ordered flattened buffer tree
+ ///
+ /// The number of buffers appended to this list depends on the schema. For
+ /// example, most primitive arrays will have 2 buffers, 1 for the validity
+ /// bitmap and 1 for the values. For struct arrays, there will only be a
+ /// single buffer for the validity (nulls) bitmap
+ buffers: [Buffer];
+
+ /// Optional compression of the message body
+ compression: BodyCompression;
+}
+
+/// For sending dictionary encoding information. Any Field can be
+/// dictionary-encoded, but in this case none of its children may be
+/// dictionary-encoded.
+/// There is one vector / column per dictionary, but that vector / column
+/// may be spread across multiple dictionary batches by using the isDelta
+/// flag
+
+table DictionaryBatch {
+ id: long;
+ data: RecordBatch;
+
+ /// If isDelta is true the values in the dictionary are to be appended to a
+ /// dictionary with the indicated id. If isDelta is false this dictionary
+ /// should replace the existing dictionary.
+ isDelta: bool = false;
+}
+
+/// ----------------------------------------------------------------------
+/// The root Message type
+
+/// This union enables us to easily send different message types without
+/// redundant storage, and in the future we can easily add new message types.
+///
+/// Arrow implementations do not need to implement all of the message types,
+/// which may include experimental metadata types. For maximum compatibility,
+/// it is best to send data using RecordBatch
+union MessageHeader {
+ Schema, DictionaryBatch, RecordBatch, Tensor, SparseTensor
+}
+
+table Message {
+ version: org.apache.arrow.flatbuf.MetadataVersion;
+ header: MessageHeader;
+ bodyLength: long;
+ custom_metadata: [ KeyValue ];
+}
+
+root_type Message;