# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. cdef class Tensor(_Weakrefable): """ A n-dimensional array a.k.a Tensor. """ def __init__(self): raise TypeError("Do not call Tensor's constructor directly, use one " "of the `pyarrow.Tensor.from_*` functions instead.") cdef void init(self, const shared_ptr[CTensor]& sp_tensor): self.sp_tensor = sp_tensor self.tp = sp_tensor.get() self.type = pyarrow_wrap_data_type(self.tp.type()) def __repr__(self): return """ type: {0.type} shape: {0.shape} strides: {0.strides}""".format(self) @staticmethod def from_numpy(obj, dim_names=None): """ Create a Tensor from a numpy array. Parameters ---------- obj : numpy.ndarray The source numpy array dim_names : list, optional Names of each dimension of the Tensor. """ cdef: vector[c_string] c_dim_names shared_ptr[CTensor] ctensor if dim_names is not None: for x in dim_names: c_dim_names.push_back(tobytes(x)) check_status(NdarrayToTensor(c_default_memory_pool(), obj, c_dim_names, &ctensor)) return pyarrow_wrap_tensor(ctensor) def to_numpy(self): """ Convert arrow::Tensor to numpy.ndarray with zero copy """ cdef PyObject* out check_status(TensorToNdarray(self.sp_tensor, self, &out)) return PyObject_to_object(out) def equals(self, Tensor other): """ Return true if the tensors contains exactly equal data """ return self.tp.Equals(deref(other.tp)) def __eq__(self, other): if isinstance(other, Tensor): return self.equals(other) else: return NotImplemented def dim_name(self, i): return frombytes(self.tp.dim_name(i)) @property def dim_names(self): return [frombytes(x) for x in tuple(self.tp.dim_names())] @property def is_mutable(self): return self.tp.is_mutable() @property def is_contiguous(self): return self.tp.is_contiguous() @property def ndim(self): return self.tp.ndim() @property def size(self): return self.tp.size() @property def shape(self): # Cython knows how to convert a vector[T] to a Python list return tuple(self.tp.shape()) @property def strides(self): return tuple(self.tp.strides()) def __getbuffer__(self, cp.Py_buffer* buffer, int flags): buffer.buf = self.tp.data().get().data() pep3118_format = self.type.pep3118_format if pep3118_format is None: raise NotImplementedError("type %s not supported for buffer " "protocol" % (self.type,)) buffer.format = pep3118_format buffer.itemsize = self.type.bit_width // 8 buffer.internal = NULL buffer.len = self.tp.size() * buffer.itemsize buffer.ndim = self.tp.ndim() buffer.obj = self if self.tp.is_mutable(): buffer.readonly = 0 else: buffer.readonly = 1 # NOTE: This assumes Py_ssize_t == int64_t, and that the shape # and strides arrays lifetime is tied to the tensor's buffer.shape = &self.tp.shape()[0] buffer.strides = &self.tp.strides()[0] buffer.suboffsets = NULL ctypedef CSparseCOOIndex* _CSparseCOOIndexPtr cdef class SparseCOOTensor(_Weakrefable): """ A sparse COO tensor. """ def __init__(self): raise TypeError("Do not call SparseCOOTensor's constructor directly, " "use one of the `pyarrow.SparseCOOTensor.from_*` " "functions instead.") cdef void init(self, const shared_ptr[CSparseCOOTensor]& sp_sparse_tensor): self.sp_sparse_tensor = sp_sparse_tensor self.stp = sp_sparse_tensor.get() self.type = pyarrow_wrap_data_type(self.stp.type()) def __repr__(self): return """ type: {0.type} shape: {0.shape}""".format(self) @classmethod def from_dense_numpy(cls, obj, dim_names=None): """ Convert numpy.ndarray to arrow::SparseCOOTensor """ return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names)) @staticmethod def from_numpy(data, coords, shape, dim_names=None): """ Create arrow::SparseCOOTensor from numpy.ndarrays Parameters ---------- data : numpy.ndarray Data used to populate the rows. coords : numpy.ndarray Coordinates of the data. shape : tuple Shape of the tensor. dim_names : list, optional Names of the dimensions. """ cdef shared_ptr[CSparseCOOTensor] csparse_tensor cdef vector[int64_t] c_shape cdef vector[c_string] c_dim_names for x in shape: c_shape.push_back(x) if dim_names is not None: for x in dim_names: c_dim_names.push_back(tobytes(x)) # Enforce precondition for SparseCOOTensor indices coords = np.require(coords, dtype='i8', requirements='C') if coords.ndim != 2: raise ValueError("Expected 2-dimensional array for " "SparseCOOTensor indices") check_status(NdarraysToSparseCOOTensor(c_default_memory_pool(), data, coords, c_shape, c_dim_names, &csparse_tensor)) return pyarrow_wrap_sparse_coo_tensor(csparse_tensor) @staticmethod def from_scipy(obj, dim_names=None): """ Convert scipy.sparse.coo_matrix to arrow::SparseCOOTensor Parameters ---------- obj : scipy.sparse.csr_matrix The scipy matrix that should be converted. dim_names : list, optional Names of the dimensions. """ import scipy.sparse if not isinstance(obj, scipy.sparse.coo_matrix): raise TypeError( "Expected scipy.sparse.coo_matrix, got {}".format(type(obj))) cdef shared_ptr[CSparseCOOTensor] csparse_tensor cdef vector[int64_t] c_shape cdef vector[c_string] c_dim_names for x in obj.shape: c_shape.push_back(x) if dim_names is not None: for x in dim_names: c_dim_names.push_back(tobytes(x)) row = obj.row col = obj.col # When SciPy's coo_matrix has canonical format, its indices matrix is # sorted in column-major order. As Arrow's SparseCOOIndex is sorted # in row-major order if it is canonical, we must sort indices matrix # into row-major order to keep its canonicalness, here. if obj.has_canonical_format: order = np.lexsort((col, row)) # sort in row-major order row = row[order] col = col[order] coords = np.vstack([row, col]).T coords = np.require(coords, dtype='i8', requirements='C') check_status(NdarraysToSparseCOOTensor(c_default_memory_pool(), obj.data, coords, c_shape, c_dim_names, &csparse_tensor)) return pyarrow_wrap_sparse_coo_tensor(csparse_tensor) @staticmethod def from_pydata_sparse(obj, dim_names=None): """ Convert pydata/sparse.COO to arrow::SparseCOOTensor. Parameters ---------- obj : pydata.sparse.COO The sparse multidimensional array that should be converted. dim_names : list, optional Names of the dimensions. """ import sparse if not isinstance(obj, sparse.COO): raise TypeError( "Expected sparse.COO, got {}".format(type(obj))) cdef shared_ptr[CSparseCOOTensor] csparse_tensor cdef vector[int64_t] c_shape cdef vector[c_string] c_dim_names for x in obj.shape: c_shape.push_back(x) if dim_names is not None: for x in dim_names: c_dim_names.push_back(tobytes(x)) coords = np.require(obj.coords.T, dtype='i8', requirements='C') check_status(NdarraysToSparseCOOTensor(c_default_memory_pool(), obj.data, coords, c_shape, c_dim_names, &csparse_tensor)) return pyarrow_wrap_sparse_coo_tensor(csparse_tensor) @staticmethod def from_tensor(obj): """ Convert arrow::Tensor to arrow::SparseCOOTensor. Parameters ---------- obj : Tensor The tensor that should be converted. """ cdef shared_ptr[CSparseCOOTensor] csparse_tensor cdef shared_ptr[CTensor] ctensor = pyarrow_unwrap_tensor(obj) with nogil: check_status(TensorToSparseCOOTensor(ctensor, &csparse_tensor)) return pyarrow_wrap_sparse_coo_tensor(csparse_tensor) def to_numpy(self): """ Convert arrow::SparseCOOTensor to numpy.ndarrays with zero copy. """ cdef PyObject* out_data cdef PyObject* out_coords check_status(SparseCOOTensorToNdarray(self.sp_sparse_tensor, self, &out_data, &out_coords)) return PyObject_to_object(out_data), PyObject_to_object(out_coords) def to_scipy(self): """ Convert arrow::SparseCOOTensor to scipy.sparse.coo_matrix. """ from scipy.sparse import coo_matrix cdef PyObject* out_data cdef PyObject* out_coords check_status(SparseCOOTensorToNdarray(self.sp_sparse_tensor, self, &out_data, &out_coords)) data = PyObject_to_object(out_data) coords = PyObject_to_object(out_coords) row, col = coords[:, 0], coords[:, 1] result = coo_matrix((data[:, 0], (row, col)), shape=self.shape) # As the description in from_scipy above, we sorted indices matrix # in row-major order if SciPy's coo_matrix has canonical format. # So, we must call sum_duplicates() to make the result coo_matrix # has canonical format. if self.has_canonical_format: result.sum_duplicates() return result def to_pydata_sparse(self): """ Convert arrow::SparseCOOTensor to pydata/sparse.COO. """ from sparse import COO cdef PyObject* out_data cdef PyObject* out_coords check_status(SparseCOOTensorToNdarray(self.sp_sparse_tensor, self, &out_data, &out_coords)) data = PyObject_to_object(out_data) coords = PyObject_to_object(out_coords) result = COO(data=data[:, 0], coords=coords.T, shape=self.shape) return result def to_tensor(self): """ Convert arrow::SparseCOOTensor to arrow::Tensor. """ cdef shared_ptr[CTensor] ctensor with nogil: ctensor = GetResultValue(self.stp.ToTensor()) return pyarrow_wrap_tensor(ctensor) def equals(self, SparseCOOTensor other): """ Return true if sparse tensors contains exactly equal data. """ return self.stp.Equals(deref(other.stp)) def __eq__(self, other): if isinstance(other, SparseCOOTensor): return self.equals(other) else: return NotImplemented @property def is_mutable(self): return self.stp.is_mutable() @property def ndim(self): return self.stp.ndim() @property def shape(self): # Cython knows how to convert a vector[T] to a Python list return tuple(self.stp.shape()) @property def size(self): return self.stp.size() def dim_name(self, i): return frombytes(self.stp.dim_name(i)) @property def dim_names(self): return tuple(frombytes(x) for x in tuple(self.stp.dim_names())) @property def non_zero_length(self): return self.stp.non_zero_length() @property def has_canonical_format(self): cdef: _CSparseCOOIndexPtr csi csi = <_CSparseCOOIndexPtr>(self.stp.sparse_index().get()) if csi != nullptr: return csi.is_canonical() return True cdef class SparseCSRMatrix(_Weakrefable): """ A sparse CSR matrix. """ def __init__(self): raise TypeError("Do not call SparseCSRMatrix's constructor directly, " "use one of the `pyarrow.SparseCSRMatrix.from_*` " "functions instead.") cdef void init(self, const shared_ptr[CSparseCSRMatrix]& sp_sparse_tensor): self.sp_sparse_tensor = sp_sparse_tensor self.stp = sp_sparse_tensor.get() self.type = pyarrow_wrap_data_type(self.stp.type()) def __repr__(self): return """ type: {0.type} shape: {0.shape}""".format(self) @classmethod def from_dense_numpy(cls, obj, dim_names=None): """ Convert numpy.ndarray to arrow::SparseCSRMatrix Parameters ---------- obj : numpy.ndarray The dense numpy array that should be converted. dim_names : list, optional The names of the dimensions. """ return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names)) @staticmethod def from_numpy(data, indptr, indices, shape, dim_names=None): """ Create arrow::SparseCSRMatrix from numpy.ndarrays. Parameters ---------- data : numpy.ndarray Data used to populate the sparse matrix. indptr : numpy.ndarray Range of the rows, The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. indices : numpy.ndarray Column indices of the corresponding non-zero values. shape : tuple Shape of the matrix. dim_names : list, optional Names of the dimensions. """ cdef shared_ptr[CSparseCSRMatrix] csparse_tensor cdef vector[int64_t] c_shape cdef vector[c_string] c_dim_names for x in shape: c_shape.push_back(x) if dim_names is not None: for x in dim_names: c_dim_names.push_back(tobytes(x)) # Enforce precondition for SparseCSRMatrix indices indptr = np.require(indptr, dtype='i8') indices = np.require(indices, dtype='i8') if indptr.ndim != 1: raise ValueError("Expected 1-dimensional array for " "SparseCSRMatrix indptr") if indices.ndim != 1: raise ValueError("Expected 1-dimensional array for " "SparseCSRMatrix indices") check_status(NdarraysToSparseCSRMatrix(c_default_memory_pool(), data, indptr, indices, c_shape, c_dim_names, &csparse_tensor)) return pyarrow_wrap_sparse_csr_matrix(csparse_tensor) @staticmethod def from_scipy(obj, dim_names=None): """ Convert scipy.sparse.csr_matrix to arrow::SparseCSRMatrix. Parameters ---------- obj : scipy.sparse.csr_matrix The scipy matrix that should be converted. dim_names : list, optional Names of the dimensions. """ import scipy.sparse if not isinstance(obj, scipy.sparse.csr_matrix): raise TypeError( "Expected scipy.sparse.csr_matrix, got {}".format(type(obj))) cdef shared_ptr[CSparseCSRMatrix] csparse_tensor cdef vector[int64_t] c_shape cdef vector[c_string] c_dim_names for x in obj.shape: c_shape.push_back(x) if dim_names is not None: for x in dim_names: c_dim_names.push_back(tobytes(x)) # Enforce precondition for CSparseCSRMatrix indices indptr = np.require(obj.indptr, dtype='i8') indices = np.require(obj.indices, dtype='i8') check_status(NdarraysToSparseCSRMatrix(c_default_memory_pool(), obj.data, indptr, indices, c_shape, c_dim_names, &csparse_tensor)) return pyarrow_wrap_sparse_csr_matrix(csparse_tensor) @staticmethod def from_tensor(obj): """ Convert arrow::Tensor to arrow::SparseCSRMatrix. Parameters ---------- obj : Tensor The dense tensor that should be converted. """ cdef shared_ptr[CSparseCSRMatrix] csparse_tensor cdef shared_ptr[CTensor] ctensor = pyarrow_unwrap_tensor(obj) with nogil: check_status(TensorToSparseCSRMatrix(ctensor, &csparse_tensor)) return pyarrow_wrap_sparse_csr_matrix(csparse_tensor) def to_numpy(self): """ Convert arrow::SparseCSRMatrix to numpy.ndarrays with zero copy. """ cdef PyObject* out_data cdef PyObject* out_indptr cdef PyObject* out_indices check_status(SparseCSRMatrixToNdarray(self.sp_sparse_tensor, self, &out_data, &out_indptr, &out_indices)) return (PyObject_to_object(out_data), PyObject_to_object(out_indptr), PyObject_to_object(out_indices)) def to_scipy(self): """ Convert arrow::SparseCSRMatrix to scipy.sparse.csr_matrix. """ from scipy.sparse import csr_matrix cdef PyObject* out_data cdef PyObject* out_indptr cdef PyObject* out_indices check_status(SparseCSRMatrixToNdarray(self.sp_sparse_tensor, self, &out_data, &out_indptr, &out_indices)) data = PyObject_to_object(out_data) indptr = PyObject_to_object(out_indptr) indices = PyObject_to_object(out_indices) result = csr_matrix((data[:, 0], indices, indptr), shape=self.shape) return result def to_tensor(self): """ Convert arrow::SparseCSRMatrix to arrow::Tensor. """ cdef shared_ptr[CTensor] ctensor with nogil: ctensor = GetResultValue(self.stp.ToTensor()) return pyarrow_wrap_tensor(ctensor) def equals(self, SparseCSRMatrix other): """ Return true if sparse tensors contains exactly equal data. """ return self.stp.Equals(deref(other.stp)) def __eq__(self, other): if isinstance(other, SparseCSRMatrix): return self.equals(other) else: return NotImplemented @property def is_mutable(self): return self.stp.is_mutable() @property def ndim(self): return self.stp.ndim() @property def shape(self): # Cython knows how to convert a vector[T] to a Python list return tuple(self.stp.shape()) @property def size(self): return self.stp.size() def dim_name(self, i): return frombytes(self.stp.dim_name(i)) @property def dim_names(self): return tuple(frombytes(x) for x in tuple(self.stp.dim_names())) @property def non_zero_length(self): return self.stp.non_zero_length() cdef class SparseCSCMatrix(_Weakrefable): """ A sparse CSC matrix. """ def __init__(self): raise TypeError("Do not call SparseCSCMatrix's constructor directly, " "use one of the `pyarrow.SparseCSCMatrix.from_*` " "functions instead.") cdef void init(self, const shared_ptr[CSparseCSCMatrix]& sp_sparse_tensor): self.sp_sparse_tensor = sp_sparse_tensor self.stp = sp_sparse_tensor.get() self.type = pyarrow_wrap_data_type(self.stp.type()) def __repr__(self): return """ type: {0.type} shape: {0.shape}""".format(self) @classmethod def from_dense_numpy(cls, obj, dim_names=None): """ Convert numpy.ndarray to arrow::SparseCSCMatrix """ return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names)) @staticmethod def from_numpy(data, indptr, indices, shape, dim_names=None): """ Create arrow::SparseCSCMatrix from numpy.ndarrays Parameters ---------- data : numpy.ndarray Data used to populate the sparse matrix. indptr : numpy.ndarray Range of the rows, The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. indices : numpy.ndarray Column indices of the corresponding non-zero values. shape : tuple Shape of the matrix. dim_names : list, optional Names of the dimensions. """ cdef shared_ptr[CSparseCSCMatrix] csparse_tensor cdef vector[int64_t] c_shape cdef vector[c_string] c_dim_names for x in shape: c_shape.push_back(x) if dim_names is not None: for x in dim_names: c_dim_names.push_back(tobytes(x)) # Enforce precondition for SparseCSCMatrix indices indptr = np.require(indptr, dtype='i8') indices = np.require(indices, dtype='i8') if indptr.ndim != 1: raise ValueError("Expected 1-dimensional array for " "SparseCSCMatrix indptr") if indices.ndim != 1: raise ValueError("Expected 1-dimensional array for " "SparseCSCMatrix indices") check_status(NdarraysToSparseCSCMatrix(c_default_memory_pool(), data, indptr, indices, c_shape, c_dim_names, &csparse_tensor)) return pyarrow_wrap_sparse_csc_matrix(csparse_tensor) @staticmethod def from_scipy(obj, dim_names=None): """ Convert scipy.sparse.csc_matrix to arrow::SparseCSCMatrix Parameters ---------- obj : scipy.sparse.csc_matrix The scipy matrix that should be converted. dim_names : list, optional Names of the dimensions. """ import scipy.sparse if not isinstance(obj, scipy.sparse.csc_matrix): raise TypeError( "Expected scipy.sparse.csc_matrix, got {}".format(type(obj))) cdef shared_ptr[CSparseCSCMatrix] csparse_tensor cdef vector[int64_t] c_shape cdef vector[c_string] c_dim_names for x in obj.shape: c_shape.push_back(x) if dim_names is not None: for x in dim_names: c_dim_names.push_back(tobytes(x)) # Enforce precondition for CSparseCSCMatrix indices indptr = np.require(obj.indptr, dtype='i8') indices = np.require(obj.indices, dtype='i8') check_status(NdarraysToSparseCSCMatrix(c_default_memory_pool(), obj.data, indptr, indices, c_shape, c_dim_names, &csparse_tensor)) return pyarrow_wrap_sparse_csc_matrix(csparse_tensor) @staticmethod def from_tensor(obj): """ Convert arrow::Tensor to arrow::SparseCSCMatrix Parameters ---------- obj : Tensor The dense tensor that should be converted. """ cdef shared_ptr[CSparseCSCMatrix] csparse_tensor cdef shared_ptr[CTensor] ctensor = pyarrow_unwrap_tensor(obj) with nogil: check_status(TensorToSparseCSCMatrix(ctensor, &csparse_tensor)) return pyarrow_wrap_sparse_csc_matrix(csparse_tensor) def to_numpy(self): """ Convert arrow::SparseCSCMatrix to numpy.ndarrays with zero copy """ cdef PyObject* out_data cdef PyObject* out_indptr cdef PyObject* out_indices check_status(SparseCSCMatrixToNdarray(self.sp_sparse_tensor, self, &out_data, &out_indptr, &out_indices)) return (PyObject_to_object(out_data), PyObject_to_object(out_indptr), PyObject_to_object(out_indices)) def to_scipy(self): """ Convert arrow::SparseCSCMatrix to scipy.sparse.csc_matrix """ from scipy.sparse import csc_matrix cdef PyObject* out_data cdef PyObject* out_indptr cdef PyObject* out_indices check_status(SparseCSCMatrixToNdarray(self.sp_sparse_tensor, self, &out_data, &out_indptr, &out_indices)) data = PyObject_to_object(out_data) indptr = PyObject_to_object(out_indptr) indices = PyObject_to_object(out_indices) result = csc_matrix((data[:, 0], indices, indptr), shape=self.shape) return result def to_tensor(self): """ Convert arrow::SparseCSCMatrix to arrow::Tensor """ cdef shared_ptr[CTensor] ctensor with nogil: ctensor = GetResultValue(self.stp.ToTensor()) return pyarrow_wrap_tensor(ctensor) def equals(self, SparseCSCMatrix other): """ Return true if sparse tensors contains exactly equal data """ return self.stp.Equals(deref(other.stp)) def __eq__(self, other): if isinstance(other, SparseCSCMatrix): return self.equals(other) else: return NotImplemented @property def is_mutable(self): return self.stp.is_mutable() @property def ndim(self): return self.stp.ndim() @property def shape(self): # Cython knows how to convert a vector[T] to a Python list return tuple(self.stp.shape()) @property def size(self): return self.stp.size() def dim_name(self, i): return frombytes(self.stp.dim_name(i)) @property def dim_names(self): return tuple(frombytes(x) for x in tuple(self.stp.dim_names())) @property def non_zero_length(self): return self.stp.non_zero_length() cdef class SparseCSFTensor(_Weakrefable): """ A sparse CSF tensor. CSF is a generalization of compressed sparse row (CSR) index. CSF index recursively compresses each dimension of a tensor into a set of prefix trees. Each path from a root to leaf forms one tensor non-zero index. CSF is implemented with two arrays of buffers and one arrays of integers. """ def __init__(self): raise TypeError("Do not call SparseCSFTensor's constructor directly, " "use one of the `pyarrow.SparseCSFTensor.from_*` " "functions instead.") cdef void init(self, const shared_ptr[CSparseCSFTensor]& sp_sparse_tensor): self.sp_sparse_tensor = sp_sparse_tensor self.stp = sp_sparse_tensor.get() self.type = pyarrow_wrap_data_type(self.stp.type()) def __repr__(self): return """ type: {0.type} shape: {0.shape}""".format(self) @classmethod def from_dense_numpy(cls, obj, dim_names=None): """ Convert numpy.ndarray to arrow::SparseCSFTensor """ return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names)) @staticmethod def from_numpy(data, indptr, indices, shape, axis_order=None, dim_names=None): """ Create arrow::SparseCSFTensor from numpy.ndarrays Parameters ---------- data : numpy.ndarray Data used to populate the sparse tensor. indptr : numpy.ndarray The sparsity structure. Each two consecutive dimensions in a tensor correspond to a buffer in indices. A pair of consecutive values at `indptr[dim][i]` `indptr[dim][i + 1]` signify a range of nodes in `indices[dim + 1]` who are children of `indices[dim][i]` node. indices : numpy.ndarray Stores values of nodes. Each tensor dimension corresponds to a buffer in indptr. shape : tuple Shape of the matrix. axis_order : list, optional the sequence in which dimensions were traversed to produce the prefix tree. dim_names : list, optional Names of the dimensions. """ cdef shared_ptr[CSparseCSFTensor] csparse_tensor cdef vector[int64_t] c_axis_order cdef vector[int64_t] c_shape cdef vector[c_string] c_dim_names for x in shape: c_shape.push_back(x) if not axis_order: axis_order = np.argsort(shape) for x in axis_order: c_axis_order.push_back(x) if dim_names is not None: for x in dim_names: c_dim_names.push_back(tobytes(x)) # Enforce preconditions for SparseCSFTensor indices if not (isinstance(indptr, (list, tuple)) and isinstance(indices, (list, tuple))): raise TypeError("Expected list or tuple, got {}, {}" .format(type(indptr), type(indices))) if len(indptr) != len(shape) - 1: raise ValueError("Expected list of {ndim} np.arrays for " "SparseCSFTensor.indptr".format(ndim=len(shape))) if len(indices) != len(shape): raise ValueError("Expected list of {ndim} np.arrays for " "SparseCSFTensor.indices".format(ndim=len(shape))) if any([x.ndim != 1 for x in indptr]): raise ValueError("Expected a list of 1-dimensional arrays for " "SparseCSFTensor.indptr") if any([x.ndim != 1 for x in indices]): raise ValueError("Expected a list of 1-dimensional arrays for " "SparseCSFTensor.indices") indptr = [np.require(arr, dtype='i8') for arr in indptr] indices = [np.require(arr, dtype='i8') for arr in indices] check_status(NdarraysToSparseCSFTensor(c_default_memory_pool(), data, indptr, indices, c_shape, c_axis_order, c_dim_names, &csparse_tensor)) return pyarrow_wrap_sparse_csf_tensor(csparse_tensor) @staticmethod def from_tensor(obj): """ Convert arrow::Tensor to arrow::SparseCSFTensor Parameters ---------- obj : Tensor The dense tensor that should be converted. """ cdef shared_ptr[CSparseCSFTensor] csparse_tensor cdef shared_ptr[CTensor] ctensor = pyarrow_unwrap_tensor(obj) with nogil: check_status(TensorToSparseCSFTensor(ctensor, &csparse_tensor)) return pyarrow_wrap_sparse_csf_tensor(csparse_tensor) def to_numpy(self): """ Convert arrow::SparseCSFTensor to numpy.ndarrays with zero copy """ cdef PyObject* out_data cdef PyObject* out_indptr cdef PyObject* out_indices check_status(SparseCSFTensorToNdarray(self.sp_sparse_tensor, self, &out_data, &out_indptr, &out_indices)) return (PyObject_to_object(out_data), PyObject_to_object(out_indptr), PyObject_to_object(out_indices)) def to_tensor(self): """ Convert arrow::SparseCSFTensor to arrow::Tensor """ cdef shared_ptr[CTensor] ctensor with nogil: ctensor = GetResultValue(self.stp.ToTensor()) return pyarrow_wrap_tensor(ctensor) def equals(self, SparseCSFTensor other): """ Return true if sparse tensors contains exactly equal data """ return self.stp.Equals(deref(other.stp)) def __eq__(self, other): if isinstance(other, SparseCSFTensor): return self.equals(other) else: return NotImplemented @property def is_mutable(self): return self.stp.is_mutable() @property def ndim(self): return self.stp.ndim() @property def shape(self): # Cython knows how to convert a vector[T] to a Python list return tuple(self.stp.shape()) @property def size(self): return self.stp.size() def dim_name(self, i): return frombytes(self.stp.dim_name(i)) @property def dim_names(self): return tuple(frombytes(x) for x in tuple(self.stp.dim_names())) @property def non_zero_length(self): return self.stp.non_zero_length()