diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/format | |
parent | Initial commit. (diff) | |
download | ceph-upstream/18.2.2.tar.xz ceph-upstream/18.2.2.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/format')
-rw-r--r-- | src/arrow/format/File.fbs | 52 | ||||
-rw-r--r-- | src/arrow/format/Flight.proto | 335 | ||||
-rw-r--r-- | src/arrow/format/Message.fbs | 140 | ||||
-rw-r--r-- | src/arrow/format/README.rst | 25 | ||||
-rw-r--r-- | src/arrow/format/Schema.fbs | 522 | ||||
-rw-r--r-- | src/arrow/format/SparseTensor.fbs | 228 | ||||
-rw-r--r-- | src/arrow/format/Tensor.fbs | 54 |
7 files changed, 1356 insertions, 0 deletions
diff --git a/src/arrow/format/File.fbs b/src/arrow/format/File.fbs new file mode 100644 index 000000000..906d494f2 --- /dev/null +++ b/src/arrow/format/File.fbs @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +include "Schema.fbs"; + +namespace org.apache.arrow.flatbuf; + +/// ---------------------------------------------------------------------- +/// Arrow File metadata +/// + +table Footer { + version: org.apache.arrow.flatbuf.MetadataVersion; + + schema: org.apache.arrow.flatbuf.Schema; + + dictionaries: [ Block ]; + + recordBatches: [ Block ]; + + /// User-defined metadata + custom_metadata: [ KeyValue ]; +} + +struct Block { + + /// Index to the start of the RecordBlock (note this is past the Message header) + offset: long; + + /// Length of the metadata + metaDataLength: int; + + /// Length of the data (this is aligned so there can be a gap between this and + /// the metadata). + bodyLength: long; +} + +root_type Footer; diff --git a/src/arrow/format/Flight.proto b/src/arrow/format/Flight.proto new file mode 100644 index 000000000..b291d9dbd --- /dev/null +++ b/src/arrow/format/Flight.proto @@ -0,0 +1,335 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; + +option java_package = "org.apache.arrow.flight.impl"; +option go_package = "github.com/apache/arrow/go/flight;flight"; +option csharp_namespace = "Apache.Arrow.Flight.Protocol"; + +package arrow.flight.protocol; + +/* + * A flight service is an endpoint for retrieving or storing Arrow data. A + * flight service can expose one or more predefined endpoints that can be + * accessed using the Arrow Flight Protocol. Additionally, a flight service + * can expose a set of actions that are available. + */ +service FlightService { + + /* + * Handshake between client and server. Depending on the server, the + * handshake may be required to determine the token that should be used for + * future operations. Both request and response are streams to allow multiple + * round-trips depending on auth mechanism. + */ + rpc Handshake(stream HandshakeRequest) returns (stream HandshakeResponse) {} + + /* + * Get a list of available streams given a particular criteria. Most flight + * services will expose one or more streams that are readily available for + * retrieval. This api allows listing the streams available for + * consumption. A user can also provide a criteria. The criteria can limit + * the subset of streams that can be listed via this interface. Each flight + * service allows its own definition of how to consume criteria. + */ + rpc ListFlights(Criteria) returns (stream FlightInfo) {} + + /* + * For a given FlightDescriptor, get information about how the flight can be + * consumed. This is a useful interface if the consumer of the interface + * already can identify the specific flight to consume. This interface can + * also allow a consumer to generate a flight stream through a specified + * descriptor. For example, a flight descriptor might be something that + * includes a SQL statement or a Pickled Python operation that will be + * executed. In those cases, the descriptor will not be previously available + * within the list of available streams provided by ListFlights but will be + * available for consumption for the duration defined by the specific flight + * service. + */ + rpc GetFlightInfo(FlightDescriptor) returns (FlightInfo) {} + + /* + * For a given FlightDescriptor, get the Schema as described in Schema.fbs::Schema + * This is used when a consumer needs the Schema of flight stream. Similar to + * GetFlightInfo this interface may generate a new flight that was not previously + * available in ListFlights. + */ + rpc GetSchema(FlightDescriptor) returns (SchemaResult) {} + + /* + * Retrieve a single stream associated with a particular descriptor + * associated with the referenced ticket. A Flight can be composed of one or + * more streams where each stream can be retrieved using a separate opaque + * ticket that the flight service uses for managing a collection of streams. + */ + rpc DoGet(Ticket) returns (stream FlightData) {} + + /* + * Push a stream to the flight service associated with a particular + * flight stream. This allows a client of a flight service to upload a stream + * of data. Depending on the particular flight service, a client consumer + * could be allowed to upload a single stream per descriptor or an unlimited + * number. In the latter, the service might implement a 'seal' action that + * can be applied to a descriptor once all streams are uploaded. + */ + rpc DoPut(stream FlightData) returns (stream PutResult) {} + + /* + * Open a bidirectional data channel for a given descriptor. This + * allows clients to send and receive arbitrary Arrow data and + * application-specific metadata in a single logical stream. In + * contrast to DoGet/DoPut, this is more suited for clients + * offloading computation (rather than storage) to a Flight service. + */ + rpc DoExchange(stream FlightData) returns (stream FlightData) {} + + /* + * Flight services can support an arbitrary number of simple actions in + * addition to the possible ListFlights, GetFlightInfo, DoGet, DoPut + * operations that are potentially available. DoAction allows a flight client + * to do a specific action against a flight service. An action includes + * opaque request and response objects that are specific to the type action + * being undertaken. + */ + rpc DoAction(Action) returns (stream Result) {} + + /* + * A flight service exposes all of the available action types that it has + * along with descriptions. This allows different flight consumers to + * understand the capabilities of the flight service. + */ + rpc ListActions(Empty) returns (stream ActionType) {} + +} + +/* + * The request that a client provides to a server on handshake. + */ +message HandshakeRequest { + + /* + * A defined protocol version + */ + uint64 protocol_version = 1; + + /* + * Arbitrary auth/handshake info. + */ + bytes payload = 2; +} + +message HandshakeResponse { + + /* + * A defined protocol version + */ + uint64 protocol_version = 1; + + /* + * Arbitrary auth/handshake info. + */ + bytes payload = 2; +} + +/* + * A message for doing simple auth. + */ +message BasicAuth { + string username = 2; + string password = 3; +} + +message Empty {} + +/* + * Describes an available action, including both the name used for execution + * along with a short description of the purpose of the action. + */ +message ActionType { + string type = 1; + string description = 2; +} + +/* + * A service specific expression that can be used to return a limited set + * of available Arrow Flight streams. + */ +message Criteria { + bytes expression = 1; +} + +/* + * An opaque action specific for the service. + */ +message Action { + string type = 1; + bytes body = 2; +} + +/* + * An opaque result returned after executing an action. + */ +message Result { + bytes body = 1; +} + +/* + * Wrap the result of a getSchema call + */ +message SchemaResult { + // schema of the dataset as described in Schema.fbs::Schema. + bytes schema = 1; +} + +/* + * The name or tag for a Flight. May be used as a way to retrieve or generate + * a flight or be used to expose a set of previously defined flights. + */ +message FlightDescriptor { + + /* + * Describes what type of descriptor is defined. + */ + enum DescriptorType { + + // Protobuf pattern, not used. + UNKNOWN = 0; + + /* + * A named path that identifies a dataset. A path is composed of a string + * or list of strings describing a particular dataset. This is conceptually + * similar to a path inside a filesystem. + */ + PATH = 1; + + /* + * An opaque command to generate a dataset. + */ + CMD = 2; + } + + DescriptorType type = 1; + + /* + * Opaque value used to express a command. Should only be defined when + * type = CMD. + */ + bytes cmd = 2; + + /* + * List of strings identifying a particular dataset. Should only be defined + * when type = PATH. + */ + repeated string path = 3; +} + +/* + * The access coordinates for retrieval of a dataset. With a FlightInfo, a + * consumer is able to determine how to retrieve a dataset. + */ +message FlightInfo { + // schema of the dataset as described in Schema.fbs::Schema. + bytes schema = 1; + + /* + * The descriptor associated with this info. + */ + FlightDescriptor flight_descriptor = 2; + + /* + * A list of endpoints associated with the flight. To consume the whole + * flight, all endpoints must be consumed. + */ + repeated FlightEndpoint endpoint = 3; + + // Set these to -1 if unknown. + int64 total_records = 4; + int64 total_bytes = 5; +} + +/* + * A particular stream or split associated with a flight. + */ +message FlightEndpoint { + + /* + * Token used to retrieve this stream. + */ + Ticket ticket = 1; + + /* + * A list of URIs where this ticket can be redeemed. If the list is + * empty, the expectation is that the ticket can only be redeemed on the + * current service where the ticket was generated. + */ + repeated Location location = 2; +} + +/* + * A location where a Flight service will accept retrieval of a particular + * stream given a ticket. + */ +message Location { + string uri = 1; +} + +/* + * An opaque identifier that the service can use to retrieve a particular + * portion of a stream. + */ +message Ticket { + bytes ticket = 1; +} + +/* + * A batch of Arrow data as part of a stream of batches. + */ +message FlightData { + + /* + * The descriptor of the data. This is only relevant when a client is + * starting a new DoPut stream. + */ + FlightDescriptor flight_descriptor = 1; + + /* + * Header for message data as described in Message.fbs::Message. + */ + bytes data_header = 2; + + /* + * Application-defined metadata. + */ + bytes app_metadata = 3; + + /* + * The actual batch of Arrow data. Preferably handled with minimal-copies + * coming last in the definition to help with sidecar patterns (it is + * expected that some implementations will fetch this field off the wire + * with specialized code to avoid extra memory copies). + */ + bytes data_body = 1000; +} + +/** + * The response message associated with the submission of a DoPut. + */ +message PutResult { + bytes app_metadata = 1; +} diff --git a/src/arrow/format/Message.fbs b/src/arrow/format/Message.fbs new file mode 100644 index 000000000..f1c18d765 --- /dev/null +++ b/src/arrow/format/Message.fbs @@ -0,0 +1,140 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +include "Schema.fbs"; +include "SparseTensor.fbs"; +include "Tensor.fbs"; + +namespace org.apache.arrow.flatbuf; + +/// ---------------------------------------------------------------------- +/// Data structures for describing a table row batch (a collection of +/// equal-length Arrow arrays) + +/// Metadata about a field at some level of a nested type tree (but not +/// its children). +/// +/// For example, a List<Int16> with values `[[1, 2, 3], null, [4], [5, 6], null]` +/// would have {length: 5, null_count: 2} for its List node, and {length: 6, +/// null_count: 0} for its Int16 node, as separate FieldNode structs +struct FieldNode { + /// The number of value slots in the Arrow array at this level of a nested + /// tree + length: long; + + /// The number of observed nulls. Fields with null_count == 0 may choose not + /// to write their physical validity bitmap out as a materialized buffer, + /// instead setting the length of the bitmap buffer to 0. + null_count: long; +} + +enum CompressionType:byte { + // LZ4 frame format, for portability, as provided by lz4frame.h or wrappers + // thereof. Not to be confused with "raw" (also called "block") format + // provided by lz4.h + LZ4_FRAME, + + // Zstandard + ZSTD +} + +/// Provided for forward compatibility in case we need to support different +/// strategies for compressing the IPC message body (like whole-body +/// compression rather than buffer-level) in the future +enum BodyCompressionMethod:byte { + /// Each constituent buffer is first compressed with the indicated + /// compressor, and then written with the uncompressed length in the first 8 + /// bytes as a 64-bit little-endian signed integer followed by the compressed + /// buffer bytes (and then padding as required by the protocol). The + /// uncompressed length may be set to -1 to indicate that the data that + /// follows is not compressed, which can be useful for cases where + /// compression does not yield appreciable savings. + BUFFER +} + +/// Optional compression for the memory buffers constituting IPC message +/// bodies. Intended for use with RecordBatch but could be used for other +/// message types +table BodyCompression { + /// Compressor library + codec: CompressionType = LZ4_FRAME; + + /// Indicates the way the record batch body was compressed + method: BodyCompressionMethod = BUFFER; +} + +/// A data header describing the shared memory layout of a "record" or "row" +/// batch. Some systems call this a "row batch" internally and others a "record +/// batch". +table RecordBatch { + /// number of records / rows. The arrays in the batch should all have this + /// length + length: long; + + /// Nodes correspond to the pre-ordered flattened logical schema + nodes: [FieldNode]; + + /// Buffers correspond to the pre-ordered flattened buffer tree + /// + /// The number of buffers appended to this list depends on the schema. For + /// example, most primitive arrays will have 2 buffers, 1 for the validity + /// bitmap and 1 for the values. For struct arrays, there will only be a + /// single buffer for the validity (nulls) bitmap + buffers: [Buffer]; + + /// Optional compression of the message body + compression: BodyCompression; +} + +/// For sending dictionary encoding information. Any Field can be +/// dictionary-encoded, but in this case none of its children may be +/// dictionary-encoded. +/// There is one vector / column per dictionary, but that vector / column +/// may be spread across multiple dictionary batches by using the isDelta +/// flag + +table DictionaryBatch { + id: long; + data: RecordBatch; + + /// If isDelta is true the values in the dictionary are to be appended to a + /// dictionary with the indicated id. If isDelta is false this dictionary + /// should replace the existing dictionary. + isDelta: bool = false; +} + +/// ---------------------------------------------------------------------- +/// The root Message type + +/// This union enables us to easily send different message types without +/// redundant storage, and in the future we can easily add new message types. +/// +/// Arrow implementations do not need to implement all of the message types, +/// which may include experimental metadata types. For maximum compatibility, +/// it is best to send data using RecordBatch +union MessageHeader { + Schema, DictionaryBatch, RecordBatch, Tensor, SparseTensor +} + +table Message { + version: org.apache.arrow.flatbuf.MetadataVersion; + header: MessageHeader; + bodyLength: long; + custom_metadata: [ KeyValue ]; +} + +root_type Message; diff --git a/src/arrow/format/README.rst b/src/arrow/format/README.rst new file mode 100644 index 000000000..0eaad49b7 --- /dev/null +++ b/src/arrow/format/README.rst @@ -0,0 +1,25 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +Arrow Protocol Files +==================== + +This folder contains binary protocol definitions for the Arrow columnar format +and other parts of the project, like the Flight RPC framework. + +For documentation about the Arrow format, see the `docs/source/format` +directory. diff --git a/src/arrow/format/Schema.fbs b/src/arrow/format/Schema.fbs new file mode 100644 index 000000000..7ee827b5d --- /dev/null +++ b/src/arrow/format/Schema.fbs @@ -0,0 +1,522 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/// Logical types, vector layouts, and schemas + +/// Format Version History. +/// Version 1.0 - Forward and backwards compatibility guaranteed. +/// Version 1.1 - Add Decimal256 (No format release). +/// Version 1.2 (Pending)- Add Interval MONTH_DAY_NANO + +namespace org.apache.arrow.flatbuf; + +enum MetadataVersion:short { + /// 0.1.0 (October 2016). + V1, + + /// 0.2.0 (February 2017). Non-backwards compatible with V1. + V2, + + /// 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2. + V3, + + /// >= 0.8.0 (December 2017). Non-backwards compatible with V3. + V4, + + /// >= 1.0.0 (July 2020. Backwards compatible with V4 (V5 readers can read V4 + /// metadata and IPC messages). Implementations are recommended to provide a + /// V4 compatibility mode with V5 format changes disabled. + /// + /// Incompatible changes between V4 and V5: + /// - Union buffer layout has changed. In V5, Unions don't have a validity + /// bitmap buffer. + V5, +} + +/// Represents Arrow Features that might not have full support +/// within implementations. This is intended to be used in +/// two scenarios: +/// 1. A mechanism for readers of Arrow Streams +/// and files to understand that the stream or file makes +/// use of a feature that isn't supported or unknown to +/// the implementation (and therefore can meet the Arrow +/// forward compatibility guarantees). +/// 2. A means of negotiating between a client and server +/// what features a stream is allowed to use. The enums +/// values here are intented to represent higher level +/// features, additional details maybe negotiated +/// with key-value pairs specific to the protocol. +/// +/// Enums added to this list should be assigned power-of-two values +/// to facilitate exchanging and comparing bitmaps for supported +/// features. +enum Feature : long { + /// Needed to make flatbuffers happy. + UNUSED = 0, + /// The stream makes use of multiple full dictionaries with the + /// same ID and assumes clients implement dictionary replacement + /// correctly. + DICTIONARY_REPLACEMENT = 1, + /// The stream makes use of compressed bodies as described + /// in Message.fbs. + COMPRESSED_BODY = 2 +} + +/// These are stored in the flatbuffer in the Type union below + +table Null { +} + +/// A Struct_ in the flatbuffer metadata is the same as an Arrow Struct +/// (according to the physical memory layout). We used Struct_ here as +/// Struct is a reserved word in Flatbuffers +table Struct_ { +} + +table List { +} + +/// Same as List, but with 64-bit offsets, allowing to represent +/// extremely large data values. +table LargeList { +} + +table FixedSizeList { + /// Number of list items per value + listSize: int; +} + +/// A Map is a logical nested type that is represented as +/// +/// List<entries: Struct<key: K, value: V>> +/// +/// In this layout, the keys and values are each respectively contiguous. We do +/// not constrain the key and value types, so the application is responsible +/// for ensuring that the keys are hashable and unique. Whether the keys are sorted +/// may be set in the metadata for this field. +/// +/// In a field with Map type, the field has a child Struct field, which then +/// has two children: key type and the second the value type. The names of the +/// child fields may be respectively "entries", "key", and "value", but this is +/// not enforced. +/// +/// Map +/// ```text +/// - child[0] entries: Struct +/// - child[0] key: K +/// - child[1] value: V +/// ``` +/// Neither the "entries" field nor the "key" field may be nullable. +/// +/// The metadata is structured so that Arrow systems without special handling +/// for Map can make Map an alias for List. The "layout" attribute for the Map +/// field must have the same contents as a List. +table Map { + /// Set to true if the keys within each value are sorted + keysSorted: bool; +} + +enum UnionMode:short { Sparse, Dense } + +/// A union is a complex type with children in Field +/// By default ids in the type vector refer to the offsets in the children +/// optionally typeIds provides an indirection between the child offset and the type id +/// for each child `typeIds[offset]` is the id used in the type vector +table Union { + mode: UnionMode; + typeIds: [ int ]; // optional, describes typeid of each child. +} + +table Int { + bitWidth: int; // restricted to 8, 16, 32, and 64 in v1 + is_signed: bool; +} + +enum Precision:short {HALF, SINGLE, DOUBLE} + +table FloatingPoint { + precision: Precision; +} + +/// Unicode with UTF-8 encoding +table Utf8 { +} + +/// Opaque binary data +table Binary { +} + +/// Same as Utf8, but with 64-bit offsets, allowing to represent +/// extremely large data values. +table LargeUtf8 { +} + +/// Same as Binary, but with 64-bit offsets, allowing to represent +/// extremely large data values. +table LargeBinary { +} + +table FixedSizeBinary { + /// Number of bytes per value + byteWidth: int; +} + +table Bool { +} + +/// Exact decimal value represented as an integer value in two's +/// complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers +/// are used. The representation uses the endianness indicated +/// in the Schema. +table Decimal { + /// Total number of decimal digits + precision: int; + + /// Number of digits after the decimal point "." + scale: int; + + /// Number of bits per value. The only accepted widths are 128 and 256. + /// We use bitWidth for consistency with Int::bitWidth. + bitWidth: int = 128; +} + +enum DateUnit: short { + DAY, + MILLISECOND +} + +/// Date is either a 32-bit or 64-bit signed integer type representing an +/// elapsed time since UNIX epoch (1970-01-01), stored in either of two units: +/// +/// * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no +/// leap seconds), where the values are evenly divisible by 86400000 +/// * Days (32 bits) since the UNIX epoch +table Date { + unit: DateUnit = MILLISECOND; +} + +enum TimeUnit: short { SECOND, MILLISECOND, MICROSECOND, NANOSECOND } + +/// Time is either a 32-bit or 64-bit signed integer type representing an +/// elapsed time since midnight, stored in either of four units: seconds, +/// milliseconds, microseconds or nanoseconds. +/// +/// The integer `bitWidth` depends on the `unit` and must be one of the following: +/// * SECOND and MILLISECOND: 32 bits +/// * MICROSECOND and NANOSECOND: 64 bits +/// +/// The allowed values are between 0 (inclusive) and 86400 (=24*60*60) seconds +/// (exclusive), adjusted for the time unit (for example, up to 86400000 +/// exclusive for the MILLISECOND unit). +/// This definition doesn't allow for leap seconds. Time values from +/// measurements with leap seconds will need to be corrected when ingesting +/// into Arrow (for example by replacing the value 86400 with 86399). +table Time { + unit: TimeUnit = MILLISECOND; + bitWidth: int = 32; +} + +/// Timestamp is a 64-bit signed integer representing an elapsed time since a +/// fixed epoch, stored in either of four units: seconds, milliseconds, +/// microseconds or nanoseconds, and is optionally annotated with a timezone. +/// +/// Timestamp values do not include any leap seconds (in other words, all +/// days are considered 86400 seconds long). +/// +/// Timestamps with a non-empty timezone +/// ------------------------------------ +/// +/// If a Timestamp column has a non-empty timezone value, its epoch is +/// 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone +/// (the Unix epoch), regardless of the Timestamp's own timezone. +/// +/// Therefore, timestamp values with a non-empty timezone correspond to +/// physical points in time together with some additional information about +/// how the data was obtained and/or how to display it (the timezone). +/// +/// For example, the timestamp value 0 with the timezone string "Europe/Paris" +/// corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the +/// application may prefer to display it as "January 1st 1970, 01h00" in +/// the Europe/Paris timezone (which is the same physical point in time). +/// +/// One consequence is that timestamp values with a non-empty timezone +/// can be compared and ordered directly, since they all share the same +/// well-known point of reference (the Unix epoch). +/// +/// Timestamps with an unset / empty timezone +/// ----------------------------------------- +/// +/// If a Timestamp column has no timezone value, its epoch is +/// 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone. +/// +/// Therefore, timestamp values without a timezone cannot be meaningfully +/// interpreted as physical points in time, but only as calendar / clock +/// indications ("wall clock time") in an unspecified timezone. +/// +/// For example, the timestamp value 0 with an empty timezone string +/// corresponds to "January 1st 1970, 00h00" in an unknown timezone: there +/// is not enough information to interpret it as a well-defined physical +/// point in time. +/// +/// One consequence is that timestamp values without a timezone cannot +/// be reliably compared or ordered, since they may have different points of +/// reference. In particular, it is *not* possible to interpret an unset +/// or empty timezone as the same as "UTC". +/// +/// Conversion between timezones +/// ---------------------------- +/// +/// If a Timestamp column has a non-empty timezone, changing the timezone +/// to a different non-empty value is a metadata-only operation: +/// the timestamp values need not change as their point of reference remains +/// the same (the Unix epoch). +/// +/// However, if a Timestamp column has no timezone value, changing it to a +/// non-empty value requires to think about the desired semantics. +/// One possibility is to assume that the original timestamp values are +/// relative to the epoch of the timezone being set; timestamp values should +/// then adjusted to the Unix epoch (for example, changing the timezone from +/// empty to "Europe/Paris" would require converting the timestamp values +/// from "Europe/Paris" to "UTC", which seems counter-intuitive but is +/// nevertheless correct). +/// +/// Guidelines for encoding data from external libraries +/// ---------------------------------------------------- +/// +/// Date & time libraries often have multiple different data types for temporal +/// data. In order to ease interoperability between different implementations the +/// Arrow project has some recommendations for encoding these types into a Timestamp +/// column. +/// +/// An "instant" represents a physical point in time that has no relevant timezone +/// (for example, astronomical data). To encode an instant, use a Timestamp with +/// the timezone string set to "UTC", and make sure the Timestamp values +/// are relative to the UTC epoch (January 1st 1970, midnight). +/// +/// A "zoned date-time" represents a physical point in time annotated with an +/// informative timezone (for example, the timezone in which the data was +/// recorded). To encode a zoned date-time, use a Timestamp with the timezone +/// string set to the name of the timezone, and make sure the Timestamp values +/// are relative to the UTC epoch (January 1st 1970, midnight). +/// +/// (There is some ambiguity between an instant and a zoned date-time with the +/// UTC timezone. Both of these are stored the same in Arrow. Typically, +/// this distinction does not matter. If it does, then an application should +/// use custom metadata or an extension type to distinguish between the two cases.) +/// +/// An "offset date-time" represents a physical point in time combined with an +/// explicit offset from UTC. To encode an offset date-time, use a Timestamp +/// with the timezone string set to the numeric timezone offset string +/// (e.g. "+03:00"), and make sure the Timestamp values are relative to +/// the UTC epoch (January 1st 1970, midnight). +/// +/// A "naive date-time" (also called "local date-time" in some libraries) +/// represents a wall clock time combined with a calendar date, but with +/// no indication of how to map this information to a physical point in time. +/// Naive date-times must be handled with care because of this missing +/// information, and also because daylight saving time (DST) may make +/// some values ambiguous or non-existent. A naive date-time may be +/// stored as a struct with Date and Time fields. However, it may also be +/// encoded into a Timestamp column with an empty timezone. The timestamp +/// values should be computed "as if" the timezone of the date-time values +/// was UTC; for example, the naive date-time "January 1st 1970, 00h00" would +/// be encoded as timestamp value 0. +table Timestamp { + unit: TimeUnit; + + /// The timezone is an optional string indicating the name of a timezone, + /// one of: + /// + /// * As used in the Olson timezone database (the "tz database" or + /// "tzdata"), such as "America/New_York". + /// * An absolute timezone offset of the form "+XX:XX" or "-XX:XX", + /// such as "+07:30". + /// + /// Whether a timezone string is present indicates different semantics about + /// the data (see above). + timezone: string; +} + +enum IntervalUnit: short { YEAR_MONTH, DAY_TIME, MONTH_DAY_NANO} +// A "calendar" interval which models types that don't necessarily +// have a precise duration without the context of a base timestamp (e.g. +// days can differ in length during day light savings time transitions). +// All integers in the types below are stored in the endianness indicated +// by the schema. +// +// YEAR_MONTH - Indicates the number of elapsed whole months, stored as +// 4-byte signed integers. +// DAY_TIME - Indicates the number of elapsed days and milliseconds (no leap seconds), +// stored as 2 contiguous 32-bit signed integers (8-bytes in total). Support +// of this IntervalUnit is not required for full arrow compatibility. +// MONTH_DAY_NANO - A triple of the number of elapsed months, days, and nanoseconds. +// The values are stored contiguously in 16-byte blocks. Months and days are +// encoded as 32-bit signed integers and nanoseconds is encoded as a 64-bit +// signed integer. Nanoseconds does not allow for leap seconds. Each field is +// independent (e.g. there is no constraint that nanoseconds have the same +// sign as days or that the quantity of nanoseconds represents less than a +// day's worth of time). +table Interval { + unit: IntervalUnit; +} + +// An absolute length of time unrelated to any calendar artifacts. +// +// For the purposes of Arrow Implementations, adding this value to a Timestamp +// ("t1") naively (i.e. simply summing the two number) is acceptable even +// though in some cases the resulting Timestamp (t2) would not account for +// leap-seconds during the elapsed time between "t1" and "t2". Similarly, +// representing the difference between two Unix timestamp is acceptable, but +// would yield a value that is possibly a few seconds off from the true elapsed +// time. +// +// The resolution defaults to millisecond, but can be any of the other +// supported TimeUnit values as with Timestamp and Time types. This type is +// always represented as an 8-byte integer. +table Duration { + unit: TimeUnit = MILLISECOND; +} + +/// ---------------------------------------------------------------------- +/// Top-level Type value, enabling extensible type-specific metadata. We can +/// add new logical types to Type without breaking backwards compatibility + +union Type { + Null, + Int, + FloatingPoint, + Binary, + Utf8, + Bool, + Decimal, + Date, + Time, + Timestamp, + Interval, + List, + Struct_, + Union, + FixedSizeBinary, + FixedSizeList, + Map, + Duration, + LargeBinary, + LargeUtf8, + LargeList, +} + +/// ---------------------------------------------------------------------- +/// user defined key value pairs to add custom metadata to arrow +/// key namespacing is the responsibility of the user + +table KeyValue { + key: string; + value: string; +} + +/// ---------------------------------------------------------------------- +/// Dictionary encoding metadata +/// Maintained for forwards compatibility, in the future +/// Dictionaries might be explicit maps between integers and values +/// allowing for non-contiguous index values +enum DictionaryKind : short { DenseArray } +table DictionaryEncoding { + /// The known dictionary id in the application where this data is used. In + /// the file or streaming formats, the dictionary ids are found in the + /// DictionaryBatch messages + id: long; + + /// The dictionary indices are constrained to be non-negative integers. If + /// this field is null, the indices must be signed int32. To maximize + /// cross-language compatibility and performance, implementations are + /// recommended to prefer signed integer types over unsigned integer types + /// and to avoid uint64 indices unless they are required by an application. + indexType: Int; + + /// By default, dictionaries are not ordered, or the order does not have + /// semantic meaning. In some statistical, applications, dictionary-encoding + /// is used to represent ordered categorical data, and we provide a way to + /// preserve that metadata here + isOrdered: bool; + + dictionaryKind: DictionaryKind; +} + +/// ---------------------------------------------------------------------- +/// A field represents a named column in a record / row batch or child of a +/// nested type. + +table Field { + /// Name is not required, in i.e. a List + name: string; + + /// Whether or not this field can contain nulls. Should be true in general. + nullable: bool; + + /// This is the type of the decoded value if the field is dictionary encoded. + type: Type; + + /// Present only if the field is dictionary encoded. + dictionary: DictionaryEncoding; + + /// children apply only to nested data types like Struct, List and Union. For + /// primitive types children will have length 0. + children: [ Field ]; + + /// User-defined metadata + custom_metadata: [ KeyValue ]; +} + +/// ---------------------------------------------------------------------- +/// Endianness of the platform producing the data + +enum Endianness:short { Little, Big } + +/// ---------------------------------------------------------------------- +/// A Buffer represents a single contiguous memory segment +struct Buffer { + /// The relative offset into the shared memory page where the bytes for this + /// buffer starts + offset: long; + + /// The absolute length (in bytes) of the memory buffer. The memory is found + /// from offset (inclusive) to offset + length (non-inclusive). When building + /// messages using the encapsulated IPC message, padding bytes may be written + /// after a buffer, but such padding bytes do not need to be accounted for in + /// the size here. + length: long; +} + +/// ---------------------------------------------------------------------- +/// A Schema describes the columns in a row batch + +table Schema { + + /// endianness of the buffer + /// it is Little Endian by default + /// if endianness doesn't match the underlying system then the vectors need to be converted + endianness: Endianness=Little; + + fields: [Field]; + // User-defined metadata + custom_metadata: [ KeyValue ]; + + /// Features used in the stream/file. + features : [ Feature ]; +} + +root_type Schema; diff --git a/src/arrow/format/SparseTensor.fbs b/src/arrow/format/SparseTensor.fbs new file mode 100644 index 000000000..a6fd2f9e7 --- /dev/null +++ b/src/arrow/format/SparseTensor.fbs @@ -0,0 +1,228 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/// EXPERIMENTAL: Metadata for n-dimensional sparse arrays, aka "sparse tensors". +/// Arrow implementations in general are not required to implement this type + +include "Tensor.fbs"; + +namespace org.apache.arrow.flatbuf; + +/// ---------------------------------------------------------------------- +/// EXPERIMENTAL: Data structures for sparse tensors + +/// Coordinate (COO) format of sparse tensor index. +/// +/// COO's index list are represented as a NxM matrix, +/// where N is the number of non-zero values, +/// and M is the number of dimensions of a sparse tensor. +/// +/// indicesBuffer stores the location and size of the data of this indices +/// matrix. The value type and the stride of the indices matrix is +/// specified in indicesType and indicesStrides fields. +/// +/// For example, let X be a 2x3x4x5 tensor, and it has the following +/// 6 non-zero values: +/// ```text +/// X[0, 1, 2, 0] := 1 +/// X[1, 1, 2, 3] := 2 +/// X[0, 2, 1, 0] := 3 +/// X[0, 1, 3, 0] := 4 +/// X[0, 1, 2, 1] := 5 +/// X[1, 2, 0, 4] := 6 +/// ``` +/// In COO format, the index matrix of X is the following 4x6 matrix: +/// ```text +/// [[0, 0, 0, 0, 1, 1], +/// [1, 1, 1, 2, 1, 2], +/// [2, 2, 3, 1, 2, 0], +/// [0, 1, 0, 0, 3, 4]] +/// ``` +/// When isCanonical is true, the indices is sorted in lexicographical order +/// (row-major order), and it does not have duplicated entries. Otherwise, +/// the indices may not be sorted, or may have duplicated entries. +table SparseTensorIndexCOO { + /// The type of values in indicesBuffer + indicesType: Int (required); + + /// Non-negative byte offsets to advance one value cell along each dimension + /// If omitted, default to row-major order (C-like). + indicesStrides: [long]; + + /// The location and size of the indices matrix's data + indicesBuffer: Buffer (required); + + /// This flag is true if and only if the indices matrix is sorted in + /// row-major order, and does not have duplicated entries. + /// This sort order is the same as of Tensorflow's SparseTensor, + /// but it is inverse order of SciPy's canonical coo_matrix + /// (SciPy employs column-major order for its coo_matrix). + isCanonical: bool; +} + +enum SparseMatrixCompressedAxis: short { Row, Column } + +/// Compressed Sparse format, that is matrix-specific. +table SparseMatrixIndexCSX { + /// Which axis, row or column, is compressed + compressedAxis: SparseMatrixCompressedAxis; + + /// The type of values in indptrBuffer + indptrType: Int (required); + + /// indptrBuffer stores the location and size of indptr array that + /// represents the range of the rows. + /// The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. + /// The length of this array is 1 + (the number of rows), and the type + /// of index value is long. + /// + /// For example, let X be the following 6x4 matrix: + /// ```text + /// X := [[0, 1, 2, 0], + /// [0, 0, 3, 0], + /// [0, 4, 0, 5], + /// [0, 0, 0, 0], + /// [6, 0, 7, 8], + /// [0, 9, 0, 0]]. + /// ``` + /// The array of non-zero values in X is: + /// ```text + /// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. + /// ``` + /// And the indptr of X is: + /// ```text + /// indptr(X) = [0, 2, 3, 5, 5, 8, 10]. + /// ``` + indptrBuffer: Buffer (required); + + /// The type of values in indicesBuffer + indicesType: Int (required); + + /// indicesBuffer stores the location and size of the array that + /// contains the column indices of the corresponding non-zero values. + /// The type of index value is long. + /// + /// For example, the indices of the above X is: + /// ```text + /// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. + /// ``` + /// Note that the indices are sorted in lexicographical order for each row. + indicesBuffer: Buffer (required); +} + +/// Compressed Sparse Fiber (CSF) sparse tensor index. +table SparseTensorIndexCSF { + /// CSF is a generalization of compressed sparse row (CSR) index. + /// See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf) + /// + /// CSF index recursively compresses each dimension of a tensor into a set + /// of prefix trees. Each path from a root to leaf forms one tensor + /// non-zero index. CSF is implemented with two arrays of buffers and one + /// arrays of integers. + /// + /// For example, let X be a 2x3x4x5 tensor and let it have the following + /// 8 non-zero values: + /// ```text + /// X[0, 0, 0, 1] := 1 + /// X[0, 0, 0, 2] := 2 + /// X[0, 1, 0, 0] := 3 + /// X[0, 1, 0, 2] := 4 + /// X[0, 1, 1, 0] := 5 + /// X[1, 1, 1, 0] := 6 + /// X[1, 1, 1, 1] := 7 + /// X[1, 1, 1, 2] := 8 + /// ``` + /// As a prefix tree this would be represented as: + /// ```text + /// 0 1 + /// / \ | + /// 0 1 1 + /// / / \ | + /// 0 0 1 1 + /// /| /| | /| | + /// 1 2 0 2 0 0 1 2 + /// ``` + /// The type of values in indptrBuffers + indptrType: Int (required); + + /// indptrBuffers stores the sparsity structure. + /// Each two consecutive dimensions in a tensor correspond to a buffer in + /// indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]` + /// and `indptrBuffers[dim][i + 1]` signify a range of nodes in + /// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. + /// + /// For example, the indptrBuffers for the above X is: + /// ```text + /// indptrBuffer(X) = [ + /// [0, 2, 3], + /// [0, 1, 3, 4], + /// [0, 2, 4, 5, 8] + /// ]. + /// ``` + indptrBuffers: [Buffer] (required); + + /// The type of values in indicesBuffers + indicesType: Int (required); + + /// indicesBuffers stores values of nodes. + /// Each tensor dimension corresponds to a buffer in indicesBuffers. + /// For example, the indicesBuffers for the above X is: + /// ```text + /// indicesBuffer(X) = [ + /// [0, 1], + /// [0, 1, 1], + /// [0, 0, 1, 1], + /// [1, 2, 0, 2, 0, 0, 1, 2] + /// ]. + /// ``` + indicesBuffers: [Buffer] (required); + + /// axisOrder stores the sequence in which dimensions were traversed to + /// produce the prefix tree. + /// For example, the axisOrder for the above X is: + /// ```text + /// axisOrder(X) = [0, 1, 2, 3]. + /// ``` + axisOrder: [int] (required); +} + +union SparseTensorIndex { + SparseTensorIndexCOO, + SparseMatrixIndexCSX, + SparseTensorIndexCSF +} + +table SparseTensor { + /// The type of data contained in a value cell. + /// Currently only fixed-width value types are supported, + /// no strings or nested types. + type: Type (required); + + /// The dimensions of the tensor, optionally named. + shape: [TensorDim] (required); + + /// The number of non-zero values in a sparse tensor. + non_zero_length: long; + + /// Sparse tensor index + sparseIndex: SparseTensorIndex (required); + + /// The location and size of the tensor's data + data: Buffer (required); +} + +root_type SparseTensor; diff --git a/src/arrow/format/Tensor.fbs b/src/arrow/format/Tensor.fbs new file mode 100644 index 000000000..409297ccf --- /dev/null +++ b/src/arrow/format/Tensor.fbs @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/// EXPERIMENTAL: Metadata for n-dimensional arrays, aka "tensors" or +/// "ndarrays". Arrow implementations in general are not required to implement +/// this type + +include "Schema.fbs"; + +namespace org.apache.arrow.flatbuf; + +/// ---------------------------------------------------------------------- +/// Data structures for dense tensors + +/// Shape data for a single axis in a tensor +table TensorDim { + /// Length of dimension + size: long; + + /// Name of the dimension, optional + name: string; +} + +table Tensor { + /// The type of data contained in a value cell. Currently only fixed-width + /// value types are supported, no strings or nested types + type: Type (required); + + /// The dimensions of the tensor, optionally named + shape: [TensorDim] (required); + + /// Non-negative byte offsets to advance one value cell along each dimension + /// If omitted, default to row-major order (C-like). + strides: [long]; + + /// The location and size of the tensor's data + data: Buffer (required); +} + +root_type Tensor; |