// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. /// EXPERIMENTAL: Metadata for n-dimensional sparse arrays, aka "sparse tensors". /// Arrow implementations in general are not required to implement this type include "Tensor.fbs"; namespace org.apache.arrow.flatbuf; /// ---------------------------------------------------------------------- /// EXPERIMENTAL: Data structures for sparse tensors /// Coordinate (COO) format of sparse tensor index. /// /// COO's index list are represented as a NxM matrix, /// where N is the number of non-zero values, /// and M is the number of dimensions of a sparse tensor. /// /// indicesBuffer stores the location and size of the data of this indices /// matrix. The value type and the stride of the indices matrix is /// specified in indicesType and indicesStrides fields. /// /// For example, let X be a 2x3x4x5 tensor, and it has the following /// 6 non-zero values: /// ```text /// X[0, 1, 2, 0] := 1 /// X[1, 1, 2, 3] := 2 /// X[0, 2, 1, 0] := 3 /// X[0, 1, 3, 0] := 4 /// X[0, 1, 2, 1] := 5 /// X[1, 2, 0, 4] := 6 /// ``` /// In COO format, the index matrix of X is the following 4x6 matrix: /// ```text /// [[0, 0, 0, 0, 1, 1], /// [1, 1, 1, 2, 1, 2], /// [2, 2, 3, 1, 2, 0], /// [0, 1, 0, 0, 3, 4]] /// ``` /// When isCanonical is true, the indices is sorted in lexicographical order /// (row-major order), and it does not have duplicated entries. Otherwise, /// the indices may not be sorted, or may have duplicated entries. table SparseTensorIndexCOO { /// The type of values in indicesBuffer indicesType: Int (required); /// Non-negative byte offsets to advance one value cell along each dimension /// If omitted, default to row-major order (C-like). indicesStrides: [long]; /// The location and size of the indices matrix's data indicesBuffer: Buffer (required); /// This flag is true if and only if the indices matrix is sorted in /// row-major order, and does not have duplicated entries. /// This sort order is the same as of Tensorflow's SparseTensor, /// but it is inverse order of SciPy's canonical coo_matrix /// (SciPy employs column-major order for its coo_matrix). isCanonical: bool; } enum SparseMatrixCompressedAxis: short { Row, Column } /// Compressed Sparse format, that is matrix-specific. table SparseMatrixIndexCSX { /// Which axis, row or column, is compressed compressedAxis: SparseMatrixCompressedAxis; /// The type of values in indptrBuffer indptrType: Int (required); /// indptrBuffer stores the location and size of indptr array that /// represents the range of the rows. /// The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. /// The length of this array is 1 + (the number of rows), and the type /// of index value is long. /// /// For example, let X be the following 6x4 matrix: /// ```text /// X := [[0, 1, 2, 0], /// [0, 0, 3, 0], /// [0, 4, 0, 5], /// [0, 0, 0, 0], /// [6, 0, 7, 8], /// [0, 9, 0, 0]]. /// ``` /// The array of non-zero values in X is: /// ```text /// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. /// ``` /// And the indptr of X is: /// ```text /// indptr(X) = [0, 2, 3, 5, 5, 8, 10]. /// ``` indptrBuffer: Buffer (required); /// The type of values in indicesBuffer indicesType: Int (required); /// indicesBuffer stores the location and size of the array that /// contains the column indices of the corresponding non-zero values. /// The type of index value is long. /// /// For example, the indices of the above X is: /// ```text /// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. /// ``` /// Note that the indices are sorted in lexicographical order for each row. indicesBuffer: Buffer (required); } /// Compressed Sparse Fiber (CSF) sparse tensor index. table SparseTensorIndexCSF { /// CSF is a generalization of compressed sparse row (CSR) index. /// See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf) /// /// CSF index recursively compresses each dimension of a tensor into a set /// of prefix trees. Each path from a root to leaf forms one tensor /// non-zero index. CSF is implemented with two arrays of buffers and one /// arrays of integers. /// /// For example, let X be a 2x3x4x5 tensor and let it have the following /// 8 non-zero values: /// ```text /// X[0, 0, 0, 1] := 1 /// X[0, 0, 0, 2] := 2 /// X[0, 1, 0, 0] := 3 /// X[0, 1, 0, 2] := 4 /// X[0, 1, 1, 0] := 5 /// X[1, 1, 1, 0] := 6 /// X[1, 1, 1, 1] := 7 /// X[1, 1, 1, 2] := 8 /// ``` /// As a prefix tree this would be represented as: /// ```text /// 0 1 /// / \ | /// 0 1 1 /// / / \ | /// 0 0 1 1 /// /| /| | /| | /// 1 2 0 2 0 0 1 2 /// ``` /// The type of values in indptrBuffers indptrType: Int (required); /// indptrBuffers stores the sparsity structure. /// Each two consecutive dimensions in a tensor correspond to a buffer in /// indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]` /// and `indptrBuffers[dim][i + 1]` signify a range of nodes in /// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. /// /// For example, the indptrBuffers for the above X is: /// ```text /// indptrBuffer(X) = [ /// [0, 2, 3], /// [0, 1, 3, 4], /// [0, 2, 4, 5, 8] /// ]. /// ``` indptrBuffers: [Buffer] (required); /// The type of values in indicesBuffers indicesType: Int (required); /// indicesBuffers stores values of nodes. /// Each tensor dimension corresponds to a buffer in indicesBuffers. /// For example, the indicesBuffers for the above X is: /// ```text /// indicesBuffer(X) = [ /// [0, 1], /// [0, 1, 1], /// [0, 0, 1, 1], /// [1, 2, 0, 2, 0, 0, 1, 2] /// ]. /// ``` indicesBuffers: [Buffer] (required); /// axisOrder stores the sequence in which dimensions were traversed to /// produce the prefix tree. /// For example, the axisOrder for the above X is: /// ```text /// axisOrder(X) = [0, 1, 2, 3]. /// ``` axisOrder: [int] (required); } union SparseTensorIndex { SparseTensorIndexCOO, SparseMatrixIndexCSX, SparseTensorIndexCSF } table SparseTensor { /// The type of data contained in a value cell. /// Currently only fixed-width value types are supported, /// no strings or nested types. type: Type (required); /// The dimensions of the tensor, optionally named. shape: [TensorDim] (required); /// The number of non-zero values in a sparse tensor. non_zero_length: long; /// Sparse tensor index sparseIndex: SparseTensorIndex (required); /// The location and size of the tensor's data data: Buffer (required); } root_type SparseTensor;