// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

include "Schema.fbs";
include "SparseTensor.fbs";
include "Tensor.fbs";

namespace org.apache.arrow.flatbuf;

/// ----------------------------------------------------------------------
/// Data structures for describing a table row batch (a collection of
/// equal-length Arrow arrays)

/// Metadata about a field at some level of a nested type tree (but not
/// its children).
///
/// For example, a List<Int16> with values `[[1, 2, 3], null, [4], [5, 6], null]`
/// would have {length: 5, null_count: 2} for its List node, and {length: 6,
/// null_count: 0} for its Int16 node, as separate FieldNode structs
struct FieldNode {
  /// The number of value slots in the Arrow array at this level of a nested
  /// tree
  length: long;

  /// The number of observed nulls. Fields with null_count == 0 may choose not
  /// to write their physical validity bitmap out as a materialized buffer,
  /// instead setting the length of the bitmap buffer to 0.
  null_count: long;
}

enum CompressionType:byte {
  // LZ4 frame format, for portability, as provided by lz4frame.h or wrappers
  // thereof. Not to be confused with "raw" (also called "block") format
  // provided by lz4.h
  LZ4_FRAME,

  // Zstandard
  ZSTD
}

/// Provided for forward compatibility in case we need to support different
/// strategies for compressing the IPC message body (like whole-body
/// compression rather than buffer-level) in the future
enum BodyCompressionMethod:byte {
  /// Each constituent buffer is first compressed with the indicated
  /// compressor, and then written with the uncompressed length in the first 8
  /// bytes as a 64-bit little-endian signed integer followed by the compressed
  /// buffer bytes (and then padding as required by the protocol). The
  /// uncompressed length may be set to -1 to indicate that the data that
  /// follows is not compressed, which can be useful for cases where
  /// compression does not yield appreciable savings.
  BUFFER
}

/// Optional compression for the memory buffers constituting IPC message
/// bodies. Intended for use with RecordBatch but could be used for other
/// message types
table BodyCompression {
  /// Compressor library
  codec: CompressionType = LZ4_FRAME;

  /// Indicates the way the record batch body was compressed
  method: BodyCompressionMethod = BUFFER;
}

/// A data header describing the shared memory layout of a "record" or "row"
/// batch. Some systems call this a "row batch" internally and others a "record
/// batch".
table RecordBatch {
  /// number of records / rows. The arrays in the batch should all have this
  /// length
  length: long;

  /// Nodes correspond to the pre-ordered flattened logical schema
  nodes: [FieldNode];

  /// Buffers correspond to the pre-ordered flattened buffer tree
  ///
  /// The number of buffers appended to this list depends on the schema. For
  /// example, most primitive arrays will have 2 buffers, 1 for the validity
  /// bitmap and 1 for the values. For struct arrays, there will only be a
  /// single buffer for the validity (nulls) bitmap
  buffers: [Buffer];

  /// Optional compression of the message body
  compression: BodyCompression;
}

/// For sending dictionary encoding information. Any Field can be
/// dictionary-encoded, but in this case none of its children may be
/// dictionary-encoded.
/// There is one vector / column per dictionary, but that vector / column
/// may be spread across multiple dictionary batches by using the isDelta
/// flag

table DictionaryBatch {
  id: long;
  data: RecordBatch;

  /// If isDelta is true the values in the dictionary are to be appended to a
  /// dictionary with the indicated id. If isDelta is false this dictionary
  /// should replace the existing dictionary.
  isDelta: bool = false;
}

/// ----------------------------------------------------------------------
/// The root Message type

/// This union enables us to easily send different message types without
/// redundant storage, and in the future we can easily add new message types.
///
/// Arrow implementations do not need to implement all of the message types,
/// which may include experimental metadata types. For maximum compatibility,
/// it is best to send data using RecordBatch
union MessageHeader {
  Schema, DictionaryBatch, RecordBatch, Tensor, SparseTensor
}

table Message {
  version: org.apache.arrow.flatbuf.MetadataVersion;
  header: MessageHeader;
  bodyLength: long;
  custom_metadata: [ KeyValue ];
}

root_type Message;