diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/python/pyarrow/includes | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/python/pyarrow/includes')
-rw-r--r-- | src/arrow/python/pyarrow/includes/__init__.pxd | 0 | ||||
-rw-r--r-- | src/arrow/python/pyarrow/includes/common.pxd | 138 | ||||
-rw-r--r-- | src/arrow/python/pyarrow/includes/libarrow.pxd | 2615 | ||||
-rw-r--r-- | src/arrow/python/pyarrow/includes/libarrow_cuda.pxd | 107 | ||||
-rw-r--r-- | src/arrow/python/pyarrow/includes/libarrow_dataset.pxd | 478 | ||||
-rw-r--r-- | src/arrow/python/pyarrow/includes/libarrow_feather.pxd | 49 | ||||
-rw-r--r-- | src/arrow/python/pyarrow/includes/libarrow_flight.pxd | 560 | ||||
-rw-r--r-- | src/arrow/python/pyarrow/includes/libarrow_fs.pxd | 296 | ||||
-rw-r--r-- | src/arrow/python/pyarrow/includes/libgandiva.pxd | 286 | ||||
-rw-r--r-- | src/arrow/python/pyarrow/includes/libplasma.pxd | 25 |
10 files changed, 4554 insertions, 0 deletions
diff --git a/src/arrow/python/pyarrow/includes/__init__.pxd b/src/arrow/python/pyarrow/includes/__init__.pxd new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/arrow/python/pyarrow/includes/__init__.pxd diff --git a/src/arrow/python/pyarrow/includes/common.pxd b/src/arrow/python/pyarrow/includes/common.pxd new file mode 100644 index 000000000..902eaafbb --- /dev/null +++ b/src/arrow/python/pyarrow/includes/common.pxd @@ -0,0 +1,138 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# distutils: language = c++ + +from libc.stdint cimport * +from libcpp cimport bool as c_bool, nullptr +from libcpp.functional cimport function +from libcpp.memory cimport shared_ptr, unique_ptr, make_shared +from libcpp.string cimport string as c_string +from libcpp.utility cimport pair +from libcpp.vector cimport vector +from libcpp.unordered_map cimport unordered_map +from libcpp.unordered_set cimport unordered_set + +from cpython cimport PyObject +from cpython.datetime cimport PyDateTime_DateTime +cimport cpython + + +cdef extern from * namespace "std" nogil: + cdef shared_ptr[T] static_pointer_cast[T, U](shared_ptr[U]) + +# vendored from the cymove project https://github.com/ozars/cymove +cdef extern from * namespace "cymove" nogil: + """ + #include <type_traits> + #include <utility> + namespace cymove { + template <typename T> + inline typename std::remove_reference<T>::type&& cymove(T& t) { + return std::move(t); + } + template <typename T> + inline typename std::remove_reference<T>::type&& cymove(T&& t) { + return std::move(t); + } + } // namespace cymove + """ + cdef T move" cymove::cymove"[T](T) + +cdef extern from * namespace "arrow::py" nogil: + """ + #include <memory> + #include <utility> + + namespace arrow { + namespace py { + template <typename T> + std::shared_ptr<T> to_shared(std::unique_ptr<T>& t) { + return std::move(t); + } + template <typename T> + std::shared_ptr<T> to_shared(std::unique_ptr<T>&& t) { + return std::move(t); + } + } // namespace py + } // namespace arrow + """ + cdef shared_ptr[T] to_shared" arrow::py::to_shared"[T](unique_ptr[T]) + +cdef extern from "arrow/python/platform.h": + pass + +cdef extern from "<Python.h>": + void Py_XDECREF(PyObject* o) + Py_ssize_t Py_REFCNT(PyObject* o) + +cdef extern from "numpy/halffloat.h": + ctypedef uint16_t npy_half + +cdef extern from "arrow/api.h" namespace "arrow" nogil: + # We can later add more of the common status factory methods as needed + cdef CStatus CStatus_OK "arrow::Status::OK"() + + cdef CStatus CStatus_Invalid "arrow::Status::Invalid"() + cdef CStatus CStatus_NotImplemented \ + "arrow::Status::NotImplemented"(const c_string& msg) + cdef CStatus CStatus_UnknownError \ + "arrow::Status::UnknownError"(const c_string& msg) + + cdef cppclass CStatus "arrow::Status": + CStatus() + + c_string ToString() + c_string message() + shared_ptr[CStatusDetail] detail() + + c_bool ok() + c_bool IsIOError() + c_bool IsOutOfMemory() + c_bool IsInvalid() + c_bool IsKeyError() + c_bool IsNotImplemented() + c_bool IsTypeError() + c_bool IsCapacityError() + c_bool IsIndexError() + c_bool IsSerializationError() + c_bool IsCancelled() + + cdef cppclass CStatusDetail "arrow::StatusDetail": + c_string ToString() + + +cdef extern from "arrow/result.h" namespace "arrow" nogil: + cdef cppclass CResult "arrow::Result"[T]: + CResult() + CResult(CStatus) + CResult(T) + c_bool ok() + CStatus status() + T operator*() + + +cdef extern from "arrow/python/common.h" namespace "arrow::py" nogil: + T GetResultValue[T](CResult[T]) except * + cdef function[F] BindFunction[F](void* unbound, object bound, ...) + + +cdef inline object PyObject_to_object(PyObject* o): + # Cast to "object" increments reference count + cdef object result = <object> o + cpython.Py_DECREF(result) + return result diff --git a/src/arrow/python/pyarrow/includes/libarrow.pxd b/src/arrow/python/pyarrow/includes/libarrow.pxd new file mode 100644 index 000000000..815238f11 --- /dev/null +++ b/src/arrow/python/pyarrow/includes/libarrow.pxd @@ -0,0 +1,2615 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# distutils: language = c++ + +from pyarrow.includes.common cimport * + +cdef extern from "arrow/util/key_value_metadata.h" namespace "arrow" nogil: + cdef cppclass CKeyValueMetadata" arrow::KeyValueMetadata": + CKeyValueMetadata() + CKeyValueMetadata(const unordered_map[c_string, c_string]&) + CKeyValueMetadata(const vector[c_string]& keys, + const vector[c_string]& values) + + void reserve(int64_t n) + int64_t size() const + c_string key(int64_t i) const + c_string value(int64_t i) const + int FindKey(const c_string& key) const + + shared_ptr[CKeyValueMetadata] Copy() const + c_bool Equals(const CKeyValueMetadata& other) + void Append(const c_string& key, const c_string& value) + void ToUnorderedMap(unordered_map[c_string, c_string]*) const + c_string ToString() const + + CResult[c_string] Get(const c_string& key) const + CStatus Delete(const c_string& key) + CStatus Set(const c_string& key, const c_string& value) + c_bool Contains(const c_string& key) const + + +cdef extern from "arrow/util/decimal.h" namespace "arrow" nogil: + cdef cppclass CDecimal128" arrow::Decimal128": + c_string ToString(int32_t scale) const + + +cdef extern from "arrow/util/decimal.h" namespace "arrow" nogil: + cdef cppclass CDecimal256" arrow::Decimal256": + c_string ToString(int32_t scale) const + + +cdef extern from "arrow/config.h" namespace "arrow" nogil: + cdef cppclass CBuildInfo" arrow::BuildInfo": + int version + int version_major + int version_minor + int version_patch + c_string version_string + c_string so_version + c_string full_so_version + c_string compiler_id + c_string compiler_version + c_string compiler_flags + c_string git_id + c_string git_description + c_string package_kind + + const CBuildInfo& GetBuildInfo() + + cdef cppclass CRuntimeInfo" arrow::RuntimeInfo": + c_string simd_level + c_string detected_simd_level + + CRuntimeInfo GetRuntimeInfo() + + +cdef extern from "arrow/api.h" namespace "arrow" nogil: + cdef enum Type" arrow::Type::type": + _Type_NA" arrow::Type::NA" + + _Type_BOOL" arrow::Type::BOOL" + + _Type_UINT8" arrow::Type::UINT8" + _Type_INT8" arrow::Type::INT8" + _Type_UINT16" arrow::Type::UINT16" + _Type_INT16" arrow::Type::INT16" + _Type_UINT32" arrow::Type::UINT32" + _Type_INT32" arrow::Type::INT32" + _Type_UINT64" arrow::Type::UINT64" + _Type_INT64" arrow::Type::INT64" + + _Type_HALF_FLOAT" arrow::Type::HALF_FLOAT" + _Type_FLOAT" arrow::Type::FLOAT" + _Type_DOUBLE" arrow::Type::DOUBLE" + + _Type_DECIMAL128" arrow::Type::DECIMAL128" + _Type_DECIMAL256" arrow::Type::DECIMAL256" + + _Type_DATE32" arrow::Type::DATE32" + _Type_DATE64" arrow::Type::DATE64" + _Type_TIMESTAMP" arrow::Type::TIMESTAMP" + _Type_TIME32" arrow::Type::TIME32" + _Type_TIME64" arrow::Type::TIME64" + _Type_DURATION" arrow::Type::DURATION" + _Type_INTERVAL_MONTH_DAY_NANO" arrow::Type::INTERVAL_MONTH_DAY_NANO" + + _Type_BINARY" arrow::Type::BINARY" + _Type_STRING" arrow::Type::STRING" + _Type_LARGE_BINARY" arrow::Type::LARGE_BINARY" + _Type_LARGE_STRING" arrow::Type::LARGE_STRING" + _Type_FIXED_SIZE_BINARY" arrow::Type::FIXED_SIZE_BINARY" + + _Type_LIST" arrow::Type::LIST" + _Type_LARGE_LIST" arrow::Type::LARGE_LIST" + _Type_FIXED_SIZE_LIST" arrow::Type::FIXED_SIZE_LIST" + _Type_STRUCT" arrow::Type::STRUCT" + _Type_SPARSE_UNION" arrow::Type::SPARSE_UNION" + _Type_DENSE_UNION" arrow::Type::DENSE_UNION" + _Type_DICTIONARY" arrow::Type::DICTIONARY" + _Type_MAP" arrow::Type::MAP" + + _Type_EXTENSION" arrow::Type::EXTENSION" + + cdef enum UnionMode" arrow::UnionMode::type": + _UnionMode_SPARSE" arrow::UnionMode::SPARSE" + _UnionMode_DENSE" arrow::UnionMode::DENSE" + + cdef enum TimeUnit" arrow::TimeUnit::type": + TimeUnit_SECOND" arrow::TimeUnit::SECOND" + TimeUnit_MILLI" arrow::TimeUnit::MILLI" + TimeUnit_MICRO" arrow::TimeUnit::MICRO" + TimeUnit_NANO" arrow::TimeUnit::NANO" + + cdef cppclass CBufferSpec" arrow::DataTypeLayout::BufferSpec": + pass + + cdef cppclass CDataTypeLayout" arrow::DataTypeLayout": + vector[CBufferSpec] buffers + c_bool has_dictionary + + cdef cppclass CDataType" arrow::DataType": + Type id() + + c_bool Equals(const CDataType& other) + c_bool Equals(const shared_ptr[CDataType]& other) + + shared_ptr[CField] field(int i) + const vector[shared_ptr[CField]] fields() + int num_fields() + CDataTypeLayout layout() + c_string ToString() + + c_bool is_primitive(Type type) + + cdef cppclass CArrayData" arrow::ArrayData": + shared_ptr[CDataType] type + int64_t length + int64_t null_count + int64_t offset + vector[shared_ptr[CBuffer]] buffers + vector[shared_ptr[CArrayData]] child_data + shared_ptr[CArrayData] dictionary + + @staticmethod + shared_ptr[CArrayData] Make(const shared_ptr[CDataType]& type, + int64_t length, + vector[shared_ptr[CBuffer]]& buffers, + int64_t null_count, + int64_t offset) + + @staticmethod + shared_ptr[CArrayData] MakeWithChildren" Make"( + const shared_ptr[CDataType]& type, + int64_t length, + vector[shared_ptr[CBuffer]]& buffers, + vector[shared_ptr[CArrayData]]& child_data, + int64_t null_count, + int64_t offset) + + @staticmethod + shared_ptr[CArrayData] MakeWithChildrenAndDictionary" Make"( + const shared_ptr[CDataType]& type, + int64_t length, + vector[shared_ptr[CBuffer]]& buffers, + vector[shared_ptr[CArrayData]]& child_data, + shared_ptr[CArrayData]& dictionary, + int64_t null_count, + int64_t offset) + + cdef cppclass CArray" arrow::Array": + shared_ptr[CDataType] type() + + int64_t length() + int64_t null_count() + int64_t offset() + Type type_id() + + int num_fields() + + CResult[shared_ptr[CScalar]] GetScalar(int64_t i) const + + c_string Diff(const CArray& other) + c_bool Equals(const CArray& arr) + c_bool IsNull(int i) + + shared_ptr[CArrayData] data() + + shared_ptr[CArray] Slice(int64_t offset) + shared_ptr[CArray] Slice(int64_t offset, int64_t length) + + CStatus Validate() const + CStatus ValidateFull() const + CResult[shared_ptr[CArray]] View(const shared_ptr[CDataType]& type) + + shared_ptr[CArray] MakeArray(const shared_ptr[CArrayData]& data) + CResult[shared_ptr[CArray]] MakeArrayOfNull( + const shared_ptr[CDataType]& type, int64_t length, CMemoryPool* pool) + + CResult[shared_ptr[CArray]] MakeArrayFromScalar( + const CScalar& scalar, int64_t length, CMemoryPool* pool) + + CStatus DebugPrint(const CArray& arr, int indent) + + cdef cppclass CFixedWidthType" arrow::FixedWidthType"(CDataType): + int bit_width() + + cdef cppclass CNullArray" arrow::NullArray"(CArray): + CNullArray(int64_t length) + + cdef cppclass CDictionaryArray" arrow::DictionaryArray"(CArray): + CDictionaryArray(const shared_ptr[CDataType]& type, + const shared_ptr[CArray]& indices, + const shared_ptr[CArray]& dictionary) + + @staticmethod + CResult[shared_ptr[CArray]] FromArrays( + const shared_ptr[CDataType]& type, + const shared_ptr[CArray]& indices, + const shared_ptr[CArray]& dictionary) + + shared_ptr[CArray] indices() + shared_ptr[CArray] dictionary() + + cdef cppclass CDate32Type" arrow::Date32Type"(CFixedWidthType): + pass + + cdef cppclass CDate64Type" arrow::Date64Type"(CFixedWidthType): + pass + + cdef cppclass CTimestampType" arrow::TimestampType"(CFixedWidthType): + CTimestampType(TimeUnit unit) + TimeUnit unit() + const c_string& timezone() + + cdef cppclass CTime32Type" arrow::Time32Type"(CFixedWidthType): + TimeUnit unit() + + cdef cppclass CTime64Type" arrow::Time64Type"(CFixedWidthType): + TimeUnit unit() + + shared_ptr[CDataType] ctime32" arrow::time32"(TimeUnit unit) + shared_ptr[CDataType] ctime64" arrow::time64"(TimeUnit unit) + + cdef cppclass CDurationType" arrow::DurationType"(CFixedWidthType): + TimeUnit unit() + + shared_ptr[CDataType] cduration" arrow::duration"(TimeUnit unit) + + cdef cppclass CDictionaryType" arrow::DictionaryType"(CFixedWidthType): + CDictionaryType(const shared_ptr[CDataType]& index_type, + const shared_ptr[CDataType]& value_type, + c_bool ordered) + + shared_ptr[CDataType] index_type() + shared_ptr[CDataType] value_type() + c_bool ordered() + + shared_ptr[CDataType] ctimestamp" arrow::timestamp"(TimeUnit unit) + shared_ptr[CDataType] ctimestamp" arrow::timestamp"( + TimeUnit unit, const c_string& timezone) + + cdef cppclass CMemoryPool" arrow::MemoryPool": + int64_t bytes_allocated() + int64_t max_memory() + c_string backend_name() + void ReleaseUnused() + + cdef cppclass CLoggingMemoryPool" arrow::LoggingMemoryPool"(CMemoryPool): + CLoggingMemoryPool(CMemoryPool*) + + cdef cppclass CProxyMemoryPool" arrow::ProxyMemoryPool"(CMemoryPool): + CProxyMemoryPool(CMemoryPool*) + + cdef cppclass CBuffer" arrow::Buffer": + CBuffer(const uint8_t* data, int64_t size) + const uint8_t* data() + uint8_t* mutable_data() + uintptr_t address() + uintptr_t mutable_address() + int64_t size() + shared_ptr[CBuffer] parent() + c_bool is_cpu() const + c_bool is_mutable() const + c_string ToHexString() + c_bool Equals(const CBuffer& other) + + shared_ptr[CBuffer] SliceBuffer(const shared_ptr[CBuffer]& buffer, + int64_t offset, int64_t length) + shared_ptr[CBuffer] SliceBuffer(const shared_ptr[CBuffer]& buffer, + int64_t offset) + + cdef cppclass CMutableBuffer" arrow::MutableBuffer"(CBuffer): + CMutableBuffer(const uint8_t* data, int64_t size) + + cdef cppclass CResizableBuffer" arrow::ResizableBuffer"(CMutableBuffer): + CStatus Resize(const int64_t new_size, c_bool shrink_to_fit) + CStatus Reserve(const int64_t new_size) + + CResult[unique_ptr[CBuffer]] AllocateBuffer(const int64_t size, + CMemoryPool* pool) + + CResult[unique_ptr[CResizableBuffer]] AllocateResizableBuffer( + const int64_t size, CMemoryPool* pool) + + cdef CMemoryPool* c_default_memory_pool" arrow::default_memory_pool"() + cdef CMemoryPool* c_system_memory_pool" arrow::system_memory_pool"() + cdef CStatus c_jemalloc_memory_pool" arrow::jemalloc_memory_pool"( + CMemoryPool** out) + cdef CStatus c_mimalloc_memory_pool" arrow::mimalloc_memory_pool"( + CMemoryPool** out) + + CStatus c_jemalloc_set_decay_ms" arrow::jemalloc_set_decay_ms"(int ms) + + cdef cppclass CListType" arrow::ListType"(CDataType): + CListType(const shared_ptr[CDataType]& value_type) + CListType(const shared_ptr[CField]& field) + shared_ptr[CDataType] value_type() + shared_ptr[CField] value_field() + + cdef cppclass CLargeListType" arrow::LargeListType"(CDataType): + CLargeListType(const shared_ptr[CDataType]& value_type) + CLargeListType(const shared_ptr[CField]& field) + shared_ptr[CDataType] value_type() + shared_ptr[CField] value_field() + + cdef cppclass CMapType" arrow::MapType"(CDataType): + CMapType(const shared_ptr[CField]& key_field, + const shared_ptr[CField]& item_field, c_bool keys_sorted) + shared_ptr[CDataType] key_type() + shared_ptr[CField] key_field() + shared_ptr[CDataType] item_type() + shared_ptr[CField] item_field() + c_bool keys_sorted() + + cdef cppclass CFixedSizeListType" arrow::FixedSizeListType"(CDataType): + CFixedSizeListType(const shared_ptr[CDataType]& value_type, + int32_t list_size) + CFixedSizeListType(const shared_ptr[CField]& field, int32_t list_size) + shared_ptr[CDataType] value_type() + shared_ptr[CField] value_field() + int32_t list_size() + + cdef cppclass CStringType" arrow::StringType"(CDataType): + pass + + cdef cppclass CFixedSizeBinaryType \ + " arrow::FixedSizeBinaryType"(CFixedWidthType): + CFixedSizeBinaryType(int byte_width) + int byte_width() + int bit_width() + + cdef cppclass CDecimal128Type \ + " arrow::Decimal128Type"(CFixedSizeBinaryType): + CDecimal128Type(int precision, int scale) + int precision() + int scale() + + cdef cppclass CDecimal256Type \ + " arrow::Decimal256Type"(CFixedSizeBinaryType): + CDecimal256Type(int precision, int scale) + int precision() + int scale() + + cdef cppclass CField" arrow::Field": + cppclass CMergeOptions "arrow::Field::MergeOptions": + c_bool promote_nullability + + @staticmethod + CMergeOptions Defaults() + + const c_string& name() + shared_ptr[CDataType] type() + c_bool nullable() + + c_string ToString() + c_bool Equals(const CField& other, c_bool check_metadata) + + shared_ptr[const CKeyValueMetadata] metadata() + + CField(const c_string& name, const shared_ptr[CDataType]& type, + c_bool nullable) + + CField(const c_string& name, const shared_ptr[CDataType]& type, + c_bool nullable, const shared_ptr[CKeyValueMetadata]& metadata) + + # Removed const in Cython so don't have to cast to get code to generate + shared_ptr[CField] AddMetadata( + const shared_ptr[CKeyValueMetadata]& metadata) + shared_ptr[CField] WithMetadata( + const shared_ptr[CKeyValueMetadata]& metadata) + shared_ptr[CField] RemoveMetadata() + shared_ptr[CField] WithType(const shared_ptr[CDataType]& type) + shared_ptr[CField] WithName(const c_string& name) + shared_ptr[CField] WithNullable(c_bool nullable) + vector[shared_ptr[CField]] Flatten() + + cdef cppclass CFieldRef" arrow::FieldRef": + CFieldRef() + CFieldRef(c_string name) + CFieldRef(int index) + const c_string* name() const + + cdef cppclass CFieldRefHash" arrow::FieldRef::Hash": + pass + + cdef cppclass CStructType" arrow::StructType"(CDataType): + CStructType(const vector[shared_ptr[CField]]& fields) + + shared_ptr[CField] GetFieldByName(const c_string& name) + vector[shared_ptr[CField]] GetAllFieldsByName(const c_string& name) + int GetFieldIndex(const c_string& name) + vector[int] GetAllFieldIndices(const c_string& name) + + cdef cppclass CUnionType" arrow::UnionType"(CDataType): + UnionMode mode() + const vector[int8_t]& type_codes() + const vector[int]& child_ids() + + cdef shared_ptr[CDataType] CMakeSparseUnionType" arrow::sparse_union"( + vector[shared_ptr[CField]] fields, + vector[int8_t] type_codes) + + cdef shared_ptr[CDataType] CMakeDenseUnionType" arrow::dense_union"( + vector[shared_ptr[CField]] fields, + vector[int8_t] type_codes) + + cdef cppclass CSchema" arrow::Schema": + CSchema(const vector[shared_ptr[CField]]& fields) + CSchema(const vector[shared_ptr[CField]]& fields, + const shared_ptr[const CKeyValueMetadata]& metadata) + + # Does not actually exist, but gets Cython to not complain + CSchema(const vector[shared_ptr[CField]]& fields, + const shared_ptr[CKeyValueMetadata]& metadata) + + c_bool Equals(const CSchema& other, c_bool check_metadata) + + shared_ptr[CField] field(int i) + shared_ptr[const CKeyValueMetadata] metadata() + shared_ptr[CField] GetFieldByName(const c_string& name) + vector[shared_ptr[CField]] GetAllFieldsByName(const c_string& name) + int GetFieldIndex(const c_string& name) + vector[int] GetAllFieldIndices(const c_string& name) + int num_fields() + c_string ToString() + + CResult[shared_ptr[CSchema]] AddField(int i, + const shared_ptr[CField]& field) + CResult[shared_ptr[CSchema]] RemoveField(int i) + CResult[shared_ptr[CSchema]] SetField(int i, + const shared_ptr[CField]& field) + + # Removed const in Cython so don't have to cast to get code to generate + shared_ptr[CSchema] AddMetadata( + const shared_ptr[CKeyValueMetadata]& metadata) + shared_ptr[CSchema] WithMetadata( + const shared_ptr[CKeyValueMetadata]& metadata) + shared_ptr[CSchema] RemoveMetadata() + + CResult[shared_ptr[CSchema]] UnifySchemas( + const vector[shared_ptr[CSchema]]& schemas) + + cdef cppclass PrettyPrintOptions: + PrettyPrintOptions() + PrettyPrintOptions(int indent_arg) + PrettyPrintOptions(int indent_arg, int window_arg) + int indent + int indent_size + int window + c_string null_rep + c_bool skip_new_lines + c_bool truncate_metadata + c_bool show_field_metadata + c_bool show_schema_metadata + + @staticmethod + PrettyPrintOptions Defaults() + + CStatus PrettyPrint(const CArray& schema, + const PrettyPrintOptions& options, + c_string* result) + CStatus PrettyPrint(const CChunkedArray& schema, + const PrettyPrintOptions& options, + c_string* result) + CStatus PrettyPrint(const CSchema& schema, + const PrettyPrintOptions& options, + c_string* result) + + cdef cppclass CBooleanArray" arrow::BooleanArray"(CArray): + c_bool Value(int i) + int64_t false_count() + int64_t true_count() + + cdef cppclass CUInt8Array" arrow::UInt8Array"(CArray): + uint8_t Value(int i) + + cdef cppclass CInt8Array" arrow::Int8Array"(CArray): + int8_t Value(int i) + + cdef cppclass CUInt16Array" arrow::UInt16Array"(CArray): + uint16_t Value(int i) + + cdef cppclass CInt16Array" arrow::Int16Array"(CArray): + int16_t Value(int i) + + cdef cppclass CUInt32Array" arrow::UInt32Array"(CArray): + uint32_t Value(int i) + + cdef cppclass CInt32Array" arrow::Int32Array"(CArray): + int32_t Value(int i) + + cdef cppclass CUInt64Array" arrow::UInt64Array"(CArray): + uint64_t Value(int i) + + cdef cppclass CInt64Array" arrow::Int64Array"(CArray): + int64_t Value(int i) + + cdef cppclass CDate32Array" arrow::Date32Array"(CArray): + int32_t Value(int i) + + cdef cppclass CDate64Array" arrow::Date64Array"(CArray): + int64_t Value(int i) + + cdef cppclass CTime32Array" arrow::Time32Array"(CArray): + int32_t Value(int i) + + cdef cppclass CTime64Array" arrow::Time64Array"(CArray): + int64_t Value(int i) + + cdef cppclass CTimestampArray" arrow::TimestampArray"(CArray): + int64_t Value(int i) + + cdef cppclass CDurationArray" arrow::DurationArray"(CArray): + int64_t Value(int i) + + cdef cppclass CMonthDayNanoIntervalArray \ + "arrow::MonthDayNanoIntervalArray"(CArray): + pass + + cdef cppclass CHalfFloatArray" arrow::HalfFloatArray"(CArray): + uint16_t Value(int i) + + cdef cppclass CFloatArray" arrow::FloatArray"(CArray): + float Value(int i) + + cdef cppclass CDoubleArray" arrow::DoubleArray"(CArray): + double Value(int i) + + cdef cppclass CFixedSizeBinaryArray" arrow::FixedSizeBinaryArray"(CArray): + const uint8_t* GetValue(int i) + + cdef cppclass CDecimal128Array" arrow::Decimal128Array"( + CFixedSizeBinaryArray + ): + c_string FormatValue(int i) + + cdef cppclass CDecimal256Array" arrow::Decimal256Array"( + CFixedSizeBinaryArray + ): + c_string FormatValue(int i) + + cdef cppclass CListArray" arrow::ListArray"(CArray): + @staticmethod + CResult[shared_ptr[CArray]] FromArrays( + const CArray& offsets, const CArray& values, CMemoryPool* pool) + + const int32_t* raw_value_offsets() + int32_t value_offset(int i) + int32_t value_length(int i) + shared_ptr[CArray] values() + shared_ptr[CArray] offsets() + shared_ptr[CDataType] value_type() + + cdef cppclass CLargeListArray" arrow::LargeListArray"(CArray): + @staticmethod + CResult[shared_ptr[CArray]] FromArrays( + const CArray& offsets, const CArray& values, CMemoryPool* pool) + + int64_t value_offset(int i) + int64_t value_length(int i) + shared_ptr[CArray] values() + shared_ptr[CArray] offsets() + shared_ptr[CDataType] value_type() + + cdef cppclass CFixedSizeListArray" arrow::FixedSizeListArray"(CArray): + @staticmethod + CResult[shared_ptr[CArray]] FromArrays( + const shared_ptr[CArray]& values, int32_t list_size) + + int64_t value_offset(int i) + int64_t value_length(int i) + shared_ptr[CArray] values() + shared_ptr[CDataType] value_type() + + cdef cppclass CMapArray" arrow::MapArray"(CArray): + @staticmethod + CResult[shared_ptr[CArray]] FromArrays( + const shared_ptr[CArray]& offsets, + const shared_ptr[CArray]& keys, + const shared_ptr[CArray]& items, + CMemoryPool* pool) + + shared_ptr[CArray] keys() + shared_ptr[CArray] items() + CMapType* map_type() + int64_t value_offset(int i) + int64_t value_length(int i) + shared_ptr[CArray] values() + shared_ptr[CDataType] value_type() + + cdef cppclass CUnionArray" arrow::UnionArray"(CArray): + shared_ptr[CBuffer] type_codes() + int8_t* raw_type_codes() + int child_id(int64_t index) + shared_ptr[CArray] field(int pos) + const CArray* UnsafeField(int pos) + UnionMode mode() + + cdef cppclass CSparseUnionArray" arrow::SparseUnionArray"(CUnionArray): + @staticmethod + CResult[shared_ptr[CArray]] Make( + const CArray& type_codes, + const vector[shared_ptr[CArray]]& children, + const vector[c_string]& field_names, + const vector[int8_t]& type_codes) + + cdef cppclass CDenseUnionArray" arrow::DenseUnionArray"(CUnionArray): + @staticmethod + CResult[shared_ptr[CArray]] Make( + const CArray& type_codes, + const CArray& value_offsets, + const vector[shared_ptr[CArray]]& children, + const vector[c_string]& field_names, + const vector[int8_t]& type_codes) + + int32_t value_offset(int i) + shared_ptr[CBuffer] value_offsets() + + cdef cppclass CBinaryArray" arrow::BinaryArray"(CArray): + const uint8_t* GetValue(int i, int32_t* length) + shared_ptr[CBuffer] value_data() + int32_t value_offset(int64_t i) + int32_t value_length(int64_t i) + int32_t total_values_length() + + cdef cppclass CLargeBinaryArray" arrow::LargeBinaryArray"(CArray): + const uint8_t* GetValue(int i, int64_t* length) + shared_ptr[CBuffer] value_data() + int64_t value_offset(int64_t i) + int64_t value_length(int64_t i) + int64_t total_values_length() + + cdef cppclass CStringArray" arrow::StringArray"(CBinaryArray): + CStringArray(int64_t length, shared_ptr[CBuffer] value_offsets, + shared_ptr[CBuffer] data, + shared_ptr[CBuffer] null_bitmap, + int64_t null_count, + int64_t offset) + + c_string GetString(int i) + + cdef cppclass CLargeStringArray" arrow::LargeStringArray" \ + (CLargeBinaryArray): + CLargeStringArray(int64_t length, shared_ptr[CBuffer] value_offsets, + shared_ptr[CBuffer] data, + shared_ptr[CBuffer] null_bitmap, + int64_t null_count, + int64_t offset) + + c_string GetString(int i) + + cdef cppclass CStructArray" arrow::StructArray"(CArray): + CStructArray(shared_ptr[CDataType]& type, int64_t length, + vector[shared_ptr[CArray]]& children, + shared_ptr[CBuffer] null_bitmap=nullptr, + int64_t null_count=-1, + int64_t offset=0) + + # XXX Cython crashes if default argument values are declared here + # https://github.com/cython/cython/issues/2167 + @staticmethod + CResult[shared_ptr[CArray]] MakeFromFieldNames "Make"( + vector[shared_ptr[CArray]] children, + vector[c_string] field_names, + shared_ptr[CBuffer] null_bitmap, + int64_t null_count, + int64_t offset) + + @staticmethod + CResult[shared_ptr[CArray]] MakeFromFields "Make"( + vector[shared_ptr[CArray]] children, + vector[shared_ptr[CField]] fields, + shared_ptr[CBuffer] null_bitmap, + int64_t null_count, + int64_t offset) + + shared_ptr[CArray] field(int pos) + shared_ptr[CArray] GetFieldByName(const c_string& name) const + + CResult[vector[shared_ptr[CArray]]] Flatten(CMemoryPool* pool) + + cdef cppclass CChunkedArray" arrow::ChunkedArray": + CChunkedArray(const vector[shared_ptr[CArray]]& arrays) + CChunkedArray(const vector[shared_ptr[CArray]]& arrays, + const shared_ptr[CDataType]& type) + int64_t length() + int64_t null_count() + int num_chunks() + c_bool Equals(const CChunkedArray& other) + + shared_ptr[CArray] chunk(int i) + shared_ptr[CDataType] type() + shared_ptr[CChunkedArray] Slice(int64_t offset, int64_t length) const + shared_ptr[CChunkedArray] Slice(int64_t offset) const + + CResult[vector[shared_ptr[CChunkedArray]]] Flatten(CMemoryPool* pool) + + CStatus Validate() const + CStatus ValidateFull() const + + cdef cppclass CRecordBatch" arrow::RecordBatch": + @staticmethod + shared_ptr[CRecordBatch] Make( + const shared_ptr[CSchema]& schema, int64_t num_rows, + const vector[shared_ptr[CArray]]& columns) + + @staticmethod + CResult[shared_ptr[CRecordBatch]] FromStructArray( + const shared_ptr[CArray]& array) + + c_bool Equals(const CRecordBatch& other, c_bool check_metadata) + + shared_ptr[CSchema] schema() + shared_ptr[CArray] column(int i) + const c_string& column_name(int i) + + const vector[shared_ptr[CArray]]& columns() + + int num_columns() + int64_t num_rows() + + CStatus Validate() const + CStatus ValidateFull() const + + shared_ptr[CRecordBatch] ReplaceSchemaMetadata( + const shared_ptr[CKeyValueMetadata]& metadata) + + shared_ptr[CRecordBatch] Slice(int64_t offset) + shared_ptr[CRecordBatch] Slice(int64_t offset, int64_t length) + + cdef cppclass CTable" arrow::Table": + CTable(const shared_ptr[CSchema]& schema, + const vector[shared_ptr[CChunkedArray]]& columns) + + @staticmethod + shared_ptr[CTable] Make( + const shared_ptr[CSchema]& schema, + const vector[shared_ptr[CChunkedArray]]& columns) + + @staticmethod + shared_ptr[CTable] MakeFromArrays" Make"( + const shared_ptr[CSchema]& schema, + const vector[shared_ptr[CArray]]& arrays) + + @staticmethod + CResult[shared_ptr[CTable]] FromRecordBatches( + const shared_ptr[CSchema]& schema, + const vector[shared_ptr[CRecordBatch]]& batches) + + int num_columns() + int64_t num_rows() + + c_bool Equals(const CTable& other, c_bool check_metadata) + + shared_ptr[CSchema] schema() + shared_ptr[CChunkedArray] column(int i) + shared_ptr[CField] field(int i) + + CResult[shared_ptr[CTable]] AddColumn( + int i, shared_ptr[CField] field, shared_ptr[CChunkedArray] column) + CResult[shared_ptr[CTable]] RemoveColumn(int i) + CResult[shared_ptr[CTable]] SetColumn( + int i, shared_ptr[CField] field, shared_ptr[CChunkedArray] column) + + vector[c_string] ColumnNames() + CResult[shared_ptr[CTable]] RenameColumns(const vector[c_string]&) + CResult[shared_ptr[CTable]] SelectColumns(const vector[int]&) + + CResult[shared_ptr[CTable]] Flatten(CMemoryPool* pool) + + CResult[shared_ptr[CTable]] CombineChunks(CMemoryPool* pool) + + CStatus Validate() const + CStatus ValidateFull() const + + shared_ptr[CTable] ReplaceSchemaMetadata( + const shared_ptr[CKeyValueMetadata]& metadata) + + shared_ptr[CTable] Slice(int64_t offset) + shared_ptr[CTable] Slice(int64_t offset, int64_t length) + + cdef cppclass CRecordBatchReader" arrow::RecordBatchReader": + shared_ptr[CSchema] schema() + CStatus ReadNext(shared_ptr[CRecordBatch]* batch) + CStatus ReadAll(shared_ptr[CTable]* out) + + cdef cppclass TableBatchReader(CRecordBatchReader): + TableBatchReader(const CTable& table) + void set_chunksize(int64_t chunksize) + + cdef cppclass CTensor" arrow::Tensor": + shared_ptr[CDataType] type() + shared_ptr[CBuffer] data() + + const vector[int64_t]& shape() + const vector[int64_t]& strides() + int64_t size() + + int ndim() + const vector[c_string]& dim_names() + const c_string& dim_name(int i) + + c_bool is_mutable() + c_bool is_contiguous() + Type type_id() + c_bool Equals(const CTensor& other) + + cdef cppclass CSparseIndex" arrow::SparseIndex": + pass + + cdef cppclass CSparseCOOIndex" arrow::SparseCOOIndex": + c_bool is_canonical() + + cdef cppclass CSparseCOOTensor" arrow::SparseCOOTensor": + shared_ptr[CDataType] type() + shared_ptr[CBuffer] data() + CResult[shared_ptr[CTensor]] ToTensor() + + shared_ptr[CSparseIndex] sparse_index() + + const vector[int64_t]& shape() + int64_t size() + int64_t non_zero_length() + + int ndim() + const vector[c_string]& dim_names() + const c_string& dim_name(int i) + + c_bool is_mutable() + Type type_id() + c_bool Equals(const CSparseCOOTensor& other) + + cdef cppclass CSparseCSRMatrix" arrow::SparseCSRMatrix": + shared_ptr[CDataType] type() + shared_ptr[CBuffer] data() + CResult[shared_ptr[CTensor]] ToTensor() + + const vector[int64_t]& shape() + int64_t size() + int64_t non_zero_length() + + int ndim() + const vector[c_string]& dim_names() + const c_string& dim_name(int i) + + c_bool is_mutable() + Type type_id() + c_bool Equals(const CSparseCSRMatrix& other) + + cdef cppclass CSparseCSCMatrix" arrow::SparseCSCMatrix": + shared_ptr[CDataType] type() + shared_ptr[CBuffer] data() + CResult[shared_ptr[CTensor]] ToTensor() + + const vector[int64_t]& shape() + int64_t size() + int64_t non_zero_length() + + int ndim() + const vector[c_string]& dim_names() + const c_string& dim_name(int i) + + c_bool is_mutable() + Type type_id() + c_bool Equals(const CSparseCSCMatrix& other) + + cdef cppclass CSparseCSFTensor" arrow::SparseCSFTensor": + shared_ptr[CDataType] type() + shared_ptr[CBuffer] data() + CResult[shared_ptr[CTensor]] ToTensor() + + const vector[int64_t]& shape() + int64_t size() + int64_t non_zero_length() + + int ndim() + const vector[c_string]& dim_names() + const c_string& dim_name(int i) + + c_bool is_mutable() + Type type_id() + c_bool Equals(const CSparseCSFTensor& other) + + cdef cppclass CScalar" arrow::Scalar": + CScalar(shared_ptr[CDataType]) + + shared_ptr[CDataType] type + c_bool is_valid + + c_string ToString() const + c_bool Equals(const CScalar& other) const + CStatus Validate() const + CStatus ValidateFull() const + CResult[shared_ptr[CScalar]] CastTo(shared_ptr[CDataType] to) const + + cdef cppclass CScalarHash" arrow::Scalar::Hash": + size_t operator()(const shared_ptr[CScalar]& scalar) const + + cdef cppclass CNullScalar" arrow::NullScalar"(CScalar): + CNullScalar() + + cdef cppclass CBooleanScalar" arrow::BooleanScalar"(CScalar): + c_bool value + + cdef cppclass CInt8Scalar" arrow::Int8Scalar"(CScalar): + int8_t value + + cdef cppclass CUInt8Scalar" arrow::UInt8Scalar"(CScalar): + uint8_t value + + cdef cppclass CInt16Scalar" arrow::Int16Scalar"(CScalar): + int16_t value + + cdef cppclass CUInt16Scalar" arrow::UInt16Scalar"(CScalar): + uint16_t value + + cdef cppclass CInt32Scalar" arrow::Int32Scalar"(CScalar): + int32_t value + + cdef cppclass CUInt32Scalar" arrow::UInt32Scalar"(CScalar): + uint32_t value + + cdef cppclass CInt64Scalar" arrow::Int64Scalar"(CScalar): + int64_t value + + cdef cppclass CUInt64Scalar" arrow::UInt64Scalar"(CScalar): + uint64_t value + + cdef cppclass CHalfFloatScalar" arrow::HalfFloatScalar"(CScalar): + npy_half value + + cdef cppclass CFloatScalar" arrow::FloatScalar"(CScalar): + float value + + cdef cppclass CDoubleScalar" arrow::DoubleScalar"(CScalar): + double value + + cdef cppclass CDecimal128Scalar" arrow::Decimal128Scalar"(CScalar): + CDecimal128 value + + cdef cppclass CDecimal256Scalar" arrow::Decimal256Scalar"(CScalar): + CDecimal256 value + + cdef cppclass CDate32Scalar" arrow::Date32Scalar"(CScalar): + int32_t value + + cdef cppclass CDate64Scalar" arrow::Date64Scalar"(CScalar): + int64_t value + + cdef cppclass CTime32Scalar" arrow::Time32Scalar"(CScalar): + int32_t value + + cdef cppclass CTime64Scalar" arrow::Time64Scalar"(CScalar): + int64_t value + + cdef cppclass CTimestampScalar" arrow::TimestampScalar"(CScalar): + int64_t value + + cdef cppclass CDurationScalar" arrow::DurationScalar"(CScalar): + int64_t value + + cdef cppclass CMonthDayNanoIntervalScalar \ + "arrow::MonthDayNanoIntervalScalar"(CScalar): + pass + + cdef cppclass CBaseBinaryScalar" arrow::BaseBinaryScalar"(CScalar): + shared_ptr[CBuffer] value + + cdef cppclass CBaseListScalar" arrow::BaseListScalar"(CScalar): + shared_ptr[CArray] value + + cdef cppclass CListScalar" arrow::ListScalar"(CBaseListScalar): + pass + + cdef cppclass CMapScalar" arrow::MapScalar"(CListScalar): + pass + + cdef cppclass CStructScalar" arrow::StructScalar"(CScalar): + vector[shared_ptr[CScalar]] value + CResult[shared_ptr[CScalar]] field(CFieldRef ref) const + + cdef cppclass CDictionaryScalarIndexAndDictionary \ + "arrow::DictionaryScalar::ValueType": + shared_ptr[CScalar] index + shared_ptr[CArray] dictionary + + cdef cppclass CDictionaryScalar" arrow::DictionaryScalar"(CScalar): + CDictionaryScalar(CDictionaryScalarIndexAndDictionary value, + shared_ptr[CDataType], c_bool is_valid) + CDictionaryScalarIndexAndDictionary value + + CResult[shared_ptr[CScalar]] GetEncodedValue() + + cdef cppclass CUnionScalar" arrow::UnionScalar"(CScalar): + shared_ptr[CScalar] value + int8_t type_code + + cdef cppclass CExtensionScalar" arrow::ExtensionScalar"(CScalar): + shared_ptr[CScalar] value + + shared_ptr[CScalar] MakeScalar[Value](Value value) + + cdef cppclass CConcatenateTablesOptions" arrow::ConcatenateTablesOptions": + c_bool unify_schemas + CField.CMergeOptions field_merge_options + + @staticmethod + CConcatenateTablesOptions Defaults() + + CResult[shared_ptr[CTable]] ConcatenateTables( + const vector[shared_ptr[CTable]]& tables, + CConcatenateTablesOptions options, + CMemoryPool* memory_pool) + + cdef cppclass CDictionaryUnifier" arrow::DictionaryUnifier": + @staticmethod + CResult[shared_ptr[CChunkedArray]] UnifyChunkedArray( + shared_ptr[CChunkedArray] array, CMemoryPool* pool) + + @staticmethod + CResult[shared_ptr[CTable]] UnifyTable( + const CTable& table, CMemoryPool* pool) + + +cdef extern from "arrow/builder.h" namespace "arrow" nogil: + + cdef cppclass CArrayBuilder" arrow::ArrayBuilder": + CArrayBuilder(shared_ptr[CDataType], CMemoryPool* pool) + + int64_t length() + int64_t null_count() + CStatus AppendNull() + CStatus Finish(shared_ptr[CArray]* out) + CStatus Reserve(int64_t additional_capacity) + + cdef cppclass CBooleanBuilder" arrow::BooleanBuilder"(CArrayBuilder): + CBooleanBuilder(CMemoryPool* pool) + CStatus Append(const c_bool val) + CStatus Append(const uint8_t val) + + cdef cppclass CInt8Builder" arrow::Int8Builder"(CArrayBuilder): + CInt8Builder(CMemoryPool* pool) + CStatus Append(const int8_t value) + + cdef cppclass CInt16Builder" arrow::Int16Builder"(CArrayBuilder): + CInt16Builder(CMemoryPool* pool) + CStatus Append(const int16_t value) + + cdef cppclass CInt32Builder" arrow::Int32Builder"(CArrayBuilder): + CInt32Builder(CMemoryPool* pool) + CStatus Append(const int32_t value) + + cdef cppclass CInt64Builder" arrow::Int64Builder"(CArrayBuilder): + CInt64Builder(CMemoryPool* pool) + CStatus Append(const int64_t value) + + cdef cppclass CUInt8Builder" arrow::UInt8Builder"(CArrayBuilder): + CUInt8Builder(CMemoryPool* pool) + CStatus Append(const uint8_t value) + + cdef cppclass CUInt16Builder" arrow::UInt16Builder"(CArrayBuilder): + CUInt16Builder(CMemoryPool* pool) + CStatus Append(const uint16_t value) + + cdef cppclass CUInt32Builder" arrow::UInt32Builder"(CArrayBuilder): + CUInt32Builder(CMemoryPool* pool) + CStatus Append(const uint32_t value) + + cdef cppclass CUInt64Builder" arrow::UInt64Builder"(CArrayBuilder): + CUInt64Builder(CMemoryPool* pool) + CStatus Append(const uint64_t value) + + cdef cppclass CHalfFloatBuilder" arrow::HalfFloatBuilder"(CArrayBuilder): + CHalfFloatBuilder(CMemoryPool* pool) + + cdef cppclass CFloatBuilder" arrow::FloatBuilder"(CArrayBuilder): + CFloatBuilder(CMemoryPool* pool) + CStatus Append(const float value) + + cdef cppclass CDoubleBuilder" arrow::DoubleBuilder"(CArrayBuilder): + CDoubleBuilder(CMemoryPool* pool) + CStatus Append(const double value) + + cdef cppclass CBinaryBuilder" arrow::BinaryBuilder"(CArrayBuilder): + CArrayBuilder(shared_ptr[CDataType], CMemoryPool* pool) + CStatus Append(const char* value, int32_t length) + + cdef cppclass CStringBuilder" arrow::StringBuilder"(CBinaryBuilder): + CStringBuilder(CMemoryPool* pool) + + CStatus Append(const c_string& value) + + cdef cppclass CTimestampBuilder "arrow::TimestampBuilder"(CArrayBuilder): + CTimestampBuilder(const shared_ptr[CDataType] typ, CMemoryPool* pool) + CStatus Append(const int64_t value) + + cdef cppclass CDate32Builder "arrow::Date32Builder"(CArrayBuilder): + CDate32Builder(CMemoryPool* pool) + CStatus Append(const int32_t value) + + cdef cppclass CDate64Builder "arrow::Date64Builder"(CArrayBuilder): + CDate64Builder(CMemoryPool* pool) + CStatus Append(const int64_t value) + + +# Use typedef to emulate syntax for std::function<void(..)> +ctypedef void CallbackTransform(object, const shared_ptr[CBuffer]& src, + shared_ptr[CBuffer]* dest) + + +cdef extern from "arrow/util/cancel.h" namespace "arrow" nogil: + cdef cppclass CStopToken "arrow::StopToken": + CStatus Poll() + c_bool IsStopRequested() + + cdef cppclass CStopSource "arrow::StopSource": + CStopToken token() + + CResult[CStopSource*] SetSignalStopSource() + void ResetSignalStopSource() + + CStatus RegisterCancellingSignalHandler(vector[int] signals) + void UnregisterCancellingSignalHandler() + + +cdef extern from "arrow/io/api.h" namespace "arrow::io" nogil: + cdef enum FileMode" arrow::io::FileMode::type": + FileMode_READ" arrow::io::FileMode::READ" + FileMode_WRITE" arrow::io::FileMode::WRITE" + FileMode_READWRITE" arrow::io::FileMode::READWRITE" + + cdef enum ObjectType" arrow::io::ObjectType::type": + ObjectType_FILE" arrow::io::ObjectType::FILE" + ObjectType_DIRECTORY" arrow::io::ObjectType::DIRECTORY" + + cdef cppclass CIOContext" arrow::io::IOContext": + CIOContext() + CIOContext(CStopToken) + CIOContext(CMemoryPool*) + CIOContext(CMemoryPool*, CStopToken) + + CIOContext c_default_io_context "arrow::io::default_io_context"() + int GetIOThreadPoolCapacity() + CStatus SetIOThreadPoolCapacity(int threads) + + cdef cppclass FileStatistics: + int64_t size + ObjectType kind + + cdef cppclass FileInterface: + CStatus Close() + CResult[int64_t] Tell() + FileMode mode() + c_bool closed() + + cdef cppclass Readable: + # put overload under a different name to avoid cython bug with multiple + # layers of inheritance + CResult[shared_ptr[CBuffer]] ReadBuffer" Read"(int64_t nbytes) + CResult[int64_t] Read(int64_t nbytes, uint8_t* out) + + cdef cppclass Seekable: + CStatus Seek(int64_t position) + + cdef cppclass Writable: + CStatus WriteBuffer" Write"(shared_ptr[CBuffer] data) + CStatus Write(const uint8_t* data, int64_t nbytes) + CStatus Flush() + + cdef cppclass COutputStream" arrow::io::OutputStream"(FileInterface, + Writable): + pass + + cdef cppclass CInputStream" arrow::io::InputStream"(FileInterface, + Readable): + CResult[shared_ptr[const CKeyValueMetadata]] ReadMetadata() + + cdef cppclass CRandomAccessFile" arrow::io::RandomAccessFile"(CInputStream, + Seekable): + CResult[int64_t] GetSize() + + CResult[int64_t] ReadAt(int64_t position, int64_t nbytes, + uint8_t* buffer) + CResult[shared_ptr[CBuffer]] ReadAt(int64_t position, int64_t nbytes) + c_bool supports_zero_copy() + + cdef cppclass WritableFile(COutputStream, Seekable): + CStatus WriteAt(int64_t position, const uint8_t* data, + int64_t nbytes) + + cdef cppclass ReadWriteFileInterface(CRandomAccessFile, + WritableFile): + pass + + cdef cppclass CIOFileSystem" arrow::io::FileSystem": + CStatus Stat(const c_string& path, FileStatistics* stat) + + cdef cppclass FileOutputStream(COutputStream): + @staticmethod + CResult[shared_ptr[COutputStream]] Open(const c_string& path) + + int file_descriptor() + + cdef cppclass ReadableFile(CRandomAccessFile): + @staticmethod + CResult[shared_ptr[ReadableFile]] Open(const c_string& path) + + @staticmethod + CResult[shared_ptr[ReadableFile]] Open(const c_string& path, + CMemoryPool* memory_pool) + + int file_descriptor() + + cdef cppclass CMemoryMappedFile \ + " arrow::io::MemoryMappedFile"(ReadWriteFileInterface): + + @staticmethod + CResult[shared_ptr[CMemoryMappedFile]] Create(const c_string& path, + int64_t size) + + @staticmethod + CResult[shared_ptr[CMemoryMappedFile]] Open(const c_string& path, + FileMode mode) + + CStatus Resize(int64_t size) + + int file_descriptor() + + cdef cppclass CCompressedInputStream \ + " arrow::io::CompressedInputStream"(CInputStream): + @staticmethod + CResult[shared_ptr[CCompressedInputStream]] Make( + CCodec* codec, shared_ptr[CInputStream] raw) + + cdef cppclass CCompressedOutputStream \ + " arrow::io::CompressedOutputStream"(COutputStream): + @staticmethod + CResult[shared_ptr[CCompressedOutputStream]] Make( + CCodec* codec, shared_ptr[COutputStream] raw) + + cdef cppclass CBufferedInputStream \ + " arrow::io::BufferedInputStream"(CInputStream): + + @staticmethod + CResult[shared_ptr[CBufferedInputStream]] Create( + int64_t buffer_size, CMemoryPool* pool, + shared_ptr[CInputStream] raw) + + CResult[shared_ptr[CInputStream]] Detach() + + cdef cppclass CBufferedOutputStream \ + " arrow::io::BufferedOutputStream"(COutputStream): + + @staticmethod + CResult[shared_ptr[CBufferedOutputStream]] Create( + int64_t buffer_size, CMemoryPool* pool, + shared_ptr[COutputStream] raw) + + CResult[shared_ptr[COutputStream]] Detach() + + cdef cppclass CTransformInputStreamVTable \ + "arrow::py::TransformInputStreamVTable": + CTransformInputStreamVTable() + function[CallbackTransform] transform + + shared_ptr[CInputStream] MakeTransformInputStream \ + "arrow::py::MakeTransformInputStream"( + shared_ptr[CInputStream] wrapped, CTransformInputStreamVTable vtable, + object method_arg) + + # ---------------------------------------------------------------------- + # HDFS + + CStatus HaveLibHdfs() + CStatus HaveLibHdfs3() + + cdef enum HdfsDriver" arrow::io::HdfsDriver": + HdfsDriver_LIBHDFS" arrow::io::HdfsDriver::LIBHDFS" + HdfsDriver_LIBHDFS3" arrow::io::HdfsDriver::LIBHDFS3" + + cdef cppclass HdfsConnectionConfig: + c_string host + int port + c_string user + c_string kerb_ticket + unordered_map[c_string, c_string] extra_conf + HdfsDriver driver + + cdef cppclass HdfsPathInfo: + ObjectType kind + c_string name + c_string owner + c_string group + int32_t last_modified_time + int32_t last_access_time + int64_t size + int16_t replication + int64_t block_size + int16_t permissions + + cdef cppclass HdfsReadableFile(CRandomAccessFile): + pass + + cdef cppclass HdfsOutputStream(COutputStream): + pass + + cdef cppclass CIOHadoopFileSystem \ + "arrow::io::HadoopFileSystem"(CIOFileSystem): + @staticmethod + CStatus Connect(const HdfsConnectionConfig* config, + shared_ptr[CIOHadoopFileSystem]* client) + + CStatus MakeDirectory(const c_string& path) + + CStatus Delete(const c_string& path, c_bool recursive) + + CStatus Disconnect() + + c_bool Exists(const c_string& path) + + CStatus Chmod(const c_string& path, int mode) + CStatus Chown(const c_string& path, const char* owner, + const char* group) + + CStatus GetCapacity(int64_t* nbytes) + CStatus GetUsed(int64_t* nbytes) + + CStatus ListDirectory(const c_string& path, + vector[HdfsPathInfo]* listing) + + CStatus GetPathInfo(const c_string& path, HdfsPathInfo* info) + + CStatus Rename(const c_string& src, const c_string& dst) + + CStatus OpenReadable(const c_string& path, + shared_ptr[HdfsReadableFile]* handle) + + CStatus OpenWritable(const c_string& path, c_bool append, + int32_t buffer_size, int16_t replication, + int64_t default_block_size, + shared_ptr[HdfsOutputStream]* handle) + + cdef cppclass CBufferReader \ + " arrow::io::BufferReader"(CRandomAccessFile): + CBufferReader(const shared_ptr[CBuffer]& buffer) + CBufferReader(const uint8_t* data, int64_t nbytes) + + cdef cppclass CBufferOutputStream \ + " arrow::io::BufferOutputStream"(COutputStream): + CBufferOutputStream(const shared_ptr[CResizableBuffer]& buffer) + + cdef cppclass CMockOutputStream \ + " arrow::io::MockOutputStream"(COutputStream): + CMockOutputStream() + int64_t GetExtentBytesWritten() + + cdef cppclass CFixedSizeBufferWriter \ + " arrow::io::FixedSizeBufferWriter"(WritableFile): + CFixedSizeBufferWriter(const shared_ptr[CBuffer]& buffer) + + void set_memcopy_threads(int num_threads) + void set_memcopy_blocksize(int64_t blocksize) + void set_memcopy_threshold(int64_t threshold) + + +cdef extern from "arrow/ipc/api.h" namespace "arrow::ipc" nogil: + cdef enum MessageType" arrow::ipc::MessageType": + MessageType_SCHEMA" arrow::ipc::MessageType::SCHEMA" + MessageType_RECORD_BATCH" arrow::ipc::MessageType::RECORD_BATCH" + MessageType_DICTIONARY_BATCH \ + " arrow::ipc::MessageType::DICTIONARY_BATCH" + + # TODO: use "cpdef enum class" to automatically get a Python wrapper? + # See + # https://github.com/cython/cython/commit/2c7c22f51405299a4e247f78edf52957d30cf71d#diff-61c1365c0f761a8137754bb3a73bfbf7 + ctypedef enum CMetadataVersion" arrow::ipc::MetadataVersion": + CMetadataVersion_V1" arrow::ipc::MetadataVersion::V1" + CMetadataVersion_V2" arrow::ipc::MetadataVersion::V2" + CMetadataVersion_V3" arrow::ipc::MetadataVersion::V3" + CMetadataVersion_V4" arrow::ipc::MetadataVersion::V4" + CMetadataVersion_V5" arrow::ipc::MetadataVersion::V5" + + cdef cppclass CIpcWriteOptions" arrow::ipc::IpcWriteOptions": + c_bool allow_64bit + int max_recursion_depth + int32_t alignment + c_bool write_legacy_ipc_format + CMemoryPool* memory_pool + CMetadataVersion metadata_version + shared_ptr[CCodec] codec + c_bool use_threads + c_bool emit_dictionary_deltas + + @staticmethod + CIpcWriteOptions Defaults() + + cdef cppclass CIpcReadOptions" arrow::ipc::IpcReadOptions": + int max_recursion_depth + CMemoryPool* memory_pool + shared_ptr[unordered_set[int]] included_fields + + @staticmethod + CIpcReadOptions Defaults() + + cdef cppclass CIpcWriteStats" arrow::ipc::WriteStats": + int64_t num_messages + int64_t num_record_batches + int64_t num_dictionary_batches + int64_t num_dictionary_deltas + int64_t num_replaced_dictionaries + + cdef cppclass CIpcReadStats" arrow::ipc::ReadStats": + int64_t num_messages + int64_t num_record_batches + int64_t num_dictionary_batches + int64_t num_dictionary_deltas + int64_t num_replaced_dictionaries + + cdef cppclass CDictionaryMemo" arrow::ipc::DictionaryMemo": + pass + + cdef cppclass CIpcPayload" arrow::ipc::IpcPayload": + MessageType type + shared_ptr[CBuffer] metadata + vector[shared_ptr[CBuffer]] body_buffers + int64_t body_length + + cdef cppclass CMessage" arrow::ipc::Message": + CResult[unique_ptr[CMessage]] Open(shared_ptr[CBuffer] metadata, + shared_ptr[CBuffer] body) + + shared_ptr[CBuffer] body() + + c_bool Equals(const CMessage& other) + + shared_ptr[CBuffer] metadata() + CMetadataVersion metadata_version() + MessageType type() + + CStatus SerializeTo(COutputStream* stream, + const CIpcWriteOptions& options, + int64_t* output_length) + + c_string FormatMessageType(MessageType type) + + cdef cppclass CMessageReader" arrow::ipc::MessageReader": + @staticmethod + unique_ptr[CMessageReader] Open(const shared_ptr[CInputStream]& stream) + + CResult[unique_ptr[CMessage]] ReadNextMessage() + + cdef cppclass CRecordBatchWriter" arrow::ipc::RecordBatchWriter": + CStatus Close() + CStatus WriteRecordBatch(const CRecordBatch& batch) + CStatus WriteTable(const CTable& table, int64_t max_chunksize) + + CIpcWriteStats stats() + + cdef cppclass CRecordBatchStreamReader \ + " arrow::ipc::RecordBatchStreamReader"(CRecordBatchReader): + @staticmethod + CResult[shared_ptr[CRecordBatchReader]] Open( + const shared_ptr[CInputStream], const CIpcReadOptions&) + + @staticmethod + CResult[shared_ptr[CRecordBatchReader]] Open2" Open"( + unique_ptr[CMessageReader] message_reader, + const CIpcReadOptions& options) + + CIpcReadStats stats() + + cdef cppclass CRecordBatchFileReader \ + " arrow::ipc::RecordBatchFileReader": + @staticmethod + CResult[shared_ptr[CRecordBatchFileReader]] Open( + CRandomAccessFile* file, + const CIpcReadOptions& options) + + @staticmethod + CResult[shared_ptr[CRecordBatchFileReader]] Open2" Open"( + CRandomAccessFile* file, int64_t footer_offset, + const CIpcReadOptions& options) + + shared_ptr[CSchema] schema() + + int num_record_batches() + + CResult[shared_ptr[CRecordBatch]] ReadRecordBatch(int i) + + CIpcReadStats stats() + + CResult[shared_ptr[CRecordBatchWriter]] MakeStreamWriter( + shared_ptr[COutputStream] sink, const shared_ptr[CSchema]& schema, + CIpcWriteOptions& options) + + CResult[shared_ptr[CRecordBatchWriter]] MakeFileWriter( + shared_ptr[COutputStream] sink, const shared_ptr[CSchema]& schema, + CIpcWriteOptions& options) + + CResult[unique_ptr[CMessage]] ReadMessage(CInputStream* stream, + CMemoryPool* pool) + + CStatus GetRecordBatchSize(const CRecordBatch& batch, int64_t* size) + CStatus GetTensorSize(const CTensor& tensor, int64_t* size) + + CStatus WriteTensor(const CTensor& tensor, COutputStream* dst, + int32_t* metadata_length, + int64_t* body_length) + + CResult[shared_ptr[CTensor]] ReadTensor(CInputStream* stream) + + CResult[shared_ptr[CRecordBatch]] ReadRecordBatch( + const CMessage& message, const shared_ptr[CSchema]& schema, + CDictionaryMemo* dictionary_memo, + const CIpcReadOptions& options) + + CResult[shared_ptr[CBuffer]] SerializeSchema( + const CSchema& schema, CMemoryPool* pool) + + CResult[shared_ptr[CBuffer]] SerializeRecordBatch( + const CRecordBatch& schema, const CIpcWriteOptions& options) + + CResult[shared_ptr[CSchema]] ReadSchema(CInputStream* stream, + CDictionaryMemo* dictionary_memo) + + CResult[shared_ptr[CRecordBatch]] ReadRecordBatch( + const shared_ptr[CSchema]& schema, + CDictionaryMemo* dictionary_memo, + const CIpcReadOptions& options, + CInputStream* stream) + + CStatus AlignStream(CInputStream* stream, int64_t alignment) + CStatus AlignStream(COutputStream* stream, int64_t alignment) + + cdef CStatus GetRecordBatchPayload \ + " arrow::ipc::GetRecordBatchPayload"( + const CRecordBatch& batch, + const CIpcWriteOptions& options, + CIpcPayload* out) + + +cdef extern from "arrow/util/value_parsing.h" namespace "arrow" nogil: + cdef cppclass CTimestampParser" arrow::TimestampParser": + const char* kind() const + const char* format() const + + @staticmethod + shared_ptr[CTimestampParser] MakeStrptime(c_string format) + + @staticmethod + shared_ptr[CTimestampParser] MakeISO8601() + + +cdef extern from "arrow/csv/api.h" namespace "arrow::csv" nogil: + + cdef cppclass CCSVParseOptions" arrow::csv::ParseOptions": + unsigned char delimiter + c_bool quoting + unsigned char quote_char + c_bool double_quote + c_bool escaping + unsigned char escape_char + c_bool newlines_in_values + c_bool ignore_empty_lines + + CCSVParseOptions() + CCSVParseOptions(CCSVParseOptions&&) + + @staticmethod + CCSVParseOptions Defaults() + + CStatus Validate() + + cdef cppclass CCSVConvertOptions" arrow::csv::ConvertOptions": + c_bool check_utf8 + unordered_map[c_string, shared_ptr[CDataType]] column_types + vector[c_string] null_values + vector[c_string] true_values + vector[c_string] false_values + c_bool strings_can_be_null + c_bool quoted_strings_can_be_null + vector[shared_ptr[CTimestampParser]] timestamp_parsers + + c_bool auto_dict_encode + int32_t auto_dict_max_cardinality + unsigned char decimal_point + + vector[c_string] include_columns + c_bool include_missing_columns + + CCSVConvertOptions() + CCSVConvertOptions(CCSVConvertOptions&&) + + @staticmethod + CCSVConvertOptions Defaults() + + CStatus Validate() + + cdef cppclass CCSVReadOptions" arrow::csv::ReadOptions": + c_bool use_threads + int32_t block_size + int32_t skip_rows + int32_t skip_rows_after_names + vector[c_string] column_names + c_bool autogenerate_column_names + + CCSVReadOptions() + CCSVReadOptions(CCSVReadOptions&&) + + @staticmethod + CCSVReadOptions Defaults() + + CStatus Validate() + + cdef cppclass CCSVWriteOptions" arrow::csv::WriteOptions": + c_bool include_header + int32_t batch_size + CIOContext io_context + + CCSVWriteOptions() + CCSVWriteOptions(CCSVWriteOptions&&) + + @staticmethod + CCSVWriteOptions Defaults() + + CStatus Validate() + + cdef cppclass CCSVReader" arrow::csv::TableReader": + @staticmethod + CResult[shared_ptr[CCSVReader]] Make( + CIOContext, shared_ptr[CInputStream], + CCSVReadOptions, CCSVParseOptions, CCSVConvertOptions) + + CResult[shared_ptr[CTable]] Read() + + cdef cppclass CCSVStreamingReader" arrow::csv::StreamingReader"( + CRecordBatchReader): + @staticmethod + CResult[shared_ptr[CCSVStreamingReader]] Make( + CIOContext, shared_ptr[CInputStream], + CCSVReadOptions, CCSVParseOptions, CCSVConvertOptions) + + cdef CStatus WriteCSV(CTable&, CCSVWriteOptions& options, COutputStream*) + cdef CStatus WriteCSV( + CRecordBatch&, CCSVWriteOptions& options, COutputStream*) + cdef CResult[shared_ptr[CRecordBatchWriter]] MakeCSVWriter( + shared_ptr[COutputStream], shared_ptr[CSchema], + CCSVWriteOptions& options) + + +cdef extern from "arrow/json/options.h" nogil: + + ctypedef enum CUnexpectedFieldBehavior \ + "arrow::json::UnexpectedFieldBehavior": + CUnexpectedFieldBehavior_Ignore \ + "arrow::json::UnexpectedFieldBehavior::Ignore" + CUnexpectedFieldBehavior_Error \ + "arrow::json::UnexpectedFieldBehavior::Error" + CUnexpectedFieldBehavior_InferType \ + "arrow::json::UnexpectedFieldBehavior::InferType" + + cdef cppclass CJSONReadOptions" arrow::json::ReadOptions": + c_bool use_threads + int32_t block_size + + @staticmethod + CJSONReadOptions Defaults() + + cdef cppclass CJSONParseOptions" arrow::json::ParseOptions": + shared_ptr[CSchema] explicit_schema + c_bool newlines_in_values + CUnexpectedFieldBehavior unexpected_field_behavior + + @staticmethod + CJSONParseOptions Defaults() + + +cdef extern from "arrow/json/reader.h" namespace "arrow::json" nogil: + + cdef cppclass CJSONReader" arrow::json::TableReader": + @staticmethod + CResult[shared_ptr[CJSONReader]] Make( + CMemoryPool*, shared_ptr[CInputStream], + CJSONReadOptions, CJSONParseOptions) + + CResult[shared_ptr[CTable]] Read() + + +cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: + + cdef cppclass CExecContext" arrow::compute::ExecContext": + CExecContext() + CExecContext(CMemoryPool* pool) + + cdef cppclass CKernelSignature" arrow::compute::KernelSignature": + c_string ToString() const + + cdef cppclass CKernel" arrow::compute::Kernel": + shared_ptr[CKernelSignature] signature + + cdef cppclass CArrayKernel" arrow::compute::ArrayKernel"(CKernel): + pass + + cdef cppclass CScalarKernel" arrow::compute::ScalarKernel"(CArrayKernel): + pass + + cdef cppclass CVectorKernel" arrow::compute::VectorKernel"(CArrayKernel): + pass + + cdef cppclass CScalarAggregateKernel \ + " arrow::compute::ScalarAggregateKernel"(CKernel): + pass + + cdef cppclass CHashAggregateKernel \ + " arrow::compute::HashAggregateKernel"(CKernel): + pass + + cdef cppclass CArity" arrow::compute::Arity": + int num_args + c_bool is_varargs + + cdef enum FunctionKind" arrow::compute::Function::Kind": + FunctionKind_SCALAR" arrow::compute::Function::SCALAR" + FunctionKind_VECTOR" arrow::compute::Function::VECTOR" + FunctionKind_SCALAR_AGGREGATE \ + " arrow::compute::Function::SCALAR_AGGREGATE" + FunctionKind_HASH_AGGREGATE \ + " arrow::compute::Function::HASH_AGGREGATE" + FunctionKind_META \ + " arrow::compute::Function::META" + + cdef cppclass CFunctionDoc" arrow::compute::FunctionDoc": + c_string summary + c_string description + vector[c_string] arg_names + c_string options_class + + cdef cppclass CFunctionOptionsType" arrow::compute::FunctionOptionsType": + const char* type_name() const + + cdef cppclass CFunctionOptions" arrow::compute::FunctionOptions": + const CFunctionOptionsType* options_type() const + const char* type_name() const + c_bool Equals(const CFunctionOptions& other) const + c_string ToString() const + unique_ptr[CFunctionOptions] Copy() const + CResult[shared_ptr[CBuffer]] Serialize() const + + @staticmethod + CResult[unique_ptr[CFunctionOptions]] Deserialize( + const c_string& type_name, const CBuffer& buffer) + + cdef cppclass CFunction" arrow::compute::Function": + const c_string& name() const + FunctionKind kind() const + const CArity& arity() const + const CFunctionDoc& doc() const + int num_kernels() const + CResult[CDatum] Execute(const vector[CDatum]& args, + const CFunctionOptions* options, + CExecContext* ctx) const + + cdef cppclass CScalarFunction" arrow::compute::ScalarFunction"(CFunction): + vector[const CScalarKernel*] kernels() const + + cdef cppclass CVectorFunction" arrow::compute::VectorFunction"(CFunction): + vector[const CVectorKernel*] kernels() const + + cdef cppclass CScalarAggregateFunction \ + " arrow::compute::ScalarAggregateFunction"(CFunction): + vector[const CScalarAggregateKernel*] kernels() const + + cdef cppclass CHashAggregateFunction \ + " arrow::compute::HashAggregateFunction"(CFunction): + vector[const CHashAggregateKernel*] kernels() const + + cdef cppclass CMetaFunction" arrow::compute::MetaFunction"(CFunction): + pass + + cdef cppclass CFunctionRegistry" arrow::compute::FunctionRegistry": + CResult[shared_ptr[CFunction]] GetFunction( + const c_string& name) const + vector[c_string] GetFunctionNames() const + int num_functions() const + + CFunctionRegistry* GetFunctionRegistry() + + cdef cppclass CElementWiseAggregateOptions \ + "arrow::compute::ElementWiseAggregateOptions"(CFunctionOptions): + CElementWiseAggregateOptions(c_bool skip_nulls) + c_bool skip_nulls + + ctypedef enum CRoundMode \ + "arrow::compute::RoundMode": + CRoundMode_DOWN \ + "arrow::compute::RoundMode::DOWN" + CRoundMode_UP \ + "arrow::compute::RoundMode::UP" + CRoundMode_TOWARDS_ZERO \ + "arrow::compute::RoundMode::TOWARDS_ZERO" + CRoundMode_TOWARDS_INFINITY \ + "arrow::compute::RoundMode::TOWARDS_INFINITY" + CRoundMode_HALF_DOWN \ + "arrow::compute::RoundMode::HALF_DOWN" + CRoundMode_HALF_UP \ + "arrow::compute::RoundMode::HALF_UP" + CRoundMode_HALF_TOWARDS_ZERO \ + "arrow::compute::RoundMode::HALF_TOWARDS_ZERO" + CRoundMode_HALF_TOWARDS_INFINITY \ + "arrow::compute::RoundMode::HALF_TOWARDS_INFINITY" + CRoundMode_HALF_TO_EVEN \ + "arrow::compute::RoundMode::HALF_TO_EVEN" + CRoundMode_HALF_TO_ODD \ + "arrow::compute::RoundMode::HALF_TO_ODD" + + cdef cppclass CRoundOptions \ + "arrow::compute::RoundOptions"(CFunctionOptions): + CRoundOptions(int64_t ndigits, CRoundMode round_mode) + int64_t ndigits + CRoundMode round_mode + + cdef cppclass CRoundToMultipleOptions \ + "arrow::compute::RoundToMultipleOptions"(CFunctionOptions): + CRoundToMultipleOptions(double multiple, CRoundMode round_mode) + double multiple + CRoundMode round_mode + + cdef enum CJoinNullHandlingBehavior \ + "arrow::compute::JoinOptions::NullHandlingBehavior": + CJoinNullHandlingBehavior_EMIT_NULL \ + "arrow::compute::JoinOptions::EMIT_NULL" + CJoinNullHandlingBehavior_SKIP \ + "arrow::compute::JoinOptions::SKIP" + CJoinNullHandlingBehavior_REPLACE \ + "arrow::compute::JoinOptions::REPLACE" + + cdef cppclass CJoinOptions \ + "arrow::compute::JoinOptions"(CFunctionOptions): + CJoinOptions(CJoinNullHandlingBehavior null_handling, + c_string null_replacement) + CJoinNullHandlingBehavior null_handling + c_string null_replacement + + cdef cppclass CMatchSubstringOptions \ + "arrow::compute::MatchSubstringOptions"(CFunctionOptions): + CMatchSubstringOptions(c_string pattern, c_bool ignore_case) + c_string pattern + c_bool ignore_case + + cdef cppclass CTrimOptions \ + "arrow::compute::TrimOptions"(CFunctionOptions): + CTrimOptions(c_string characters) + c_string characters + + cdef cppclass CPadOptions \ + "arrow::compute::PadOptions"(CFunctionOptions): + CPadOptions(int64_t width, c_string padding) + int64_t width + c_string padding + + cdef cppclass CSliceOptions \ + "arrow::compute::SliceOptions"(CFunctionOptions): + CSliceOptions(int64_t start, int64_t stop, int64_t step) + int64_t start + int64_t stop + int64_t step + + cdef cppclass CSplitOptions \ + "arrow::compute::SplitOptions"(CFunctionOptions): + CSplitOptions(int64_t max_splits, c_bool reverse) + int64_t max_splits + c_bool reverse + + cdef cppclass CSplitPatternOptions \ + "arrow::compute::SplitPatternOptions"(CFunctionOptions): + CSplitPatternOptions(c_string pattern, int64_t max_splits, + c_bool reverse) + int64_t max_splits + c_bool reverse + c_string pattern + + cdef cppclass CReplaceSliceOptions \ + "arrow::compute::ReplaceSliceOptions"(CFunctionOptions): + CReplaceSliceOptions(int64_t start, int64_t stop, c_string replacement) + int64_t start + int64_t stop + c_string replacement + + cdef cppclass CReplaceSubstringOptions \ + "arrow::compute::ReplaceSubstringOptions"(CFunctionOptions): + CReplaceSubstringOptions(c_string pattern, c_string replacement, + int64_t max_replacements) + c_string pattern + c_string replacement + int64_t max_replacements + + cdef cppclass CExtractRegexOptions \ + "arrow::compute::ExtractRegexOptions"(CFunctionOptions): + CExtractRegexOptions(c_string pattern) + c_string pattern + + cdef cppclass CCastOptions" arrow::compute::CastOptions"(CFunctionOptions): + CCastOptions() + CCastOptions(c_bool safe) + CCastOptions(CCastOptions&& options) + + @staticmethod + CCastOptions Safe() + + @staticmethod + CCastOptions Unsafe() + shared_ptr[CDataType] to_type + c_bool allow_int_overflow + c_bool allow_time_truncate + c_bool allow_time_overflow + c_bool allow_decimal_truncate + c_bool allow_float_truncate + c_bool allow_invalid_utf8 + + cdef enum CFilterNullSelectionBehavior \ + "arrow::compute::FilterOptions::NullSelectionBehavior": + CFilterNullSelectionBehavior_DROP \ + "arrow::compute::FilterOptions::DROP" + CFilterNullSelectionBehavior_EMIT_NULL \ + "arrow::compute::FilterOptions::EMIT_NULL" + + cdef cppclass CFilterOptions \ + " arrow::compute::FilterOptions"(CFunctionOptions): + CFilterOptions() + CFilterOptions(CFilterNullSelectionBehavior null_selection_behavior) + CFilterNullSelectionBehavior null_selection_behavior + + cdef enum CDictionaryEncodeNullEncodingBehavior \ + "arrow::compute::DictionaryEncodeOptions::NullEncodingBehavior": + CDictionaryEncodeNullEncodingBehavior_ENCODE \ + "arrow::compute::DictionaryEncodeOptions::ENCODE" + CDictionaryEncodeNullEncodingBehavior_MASK \ + "arrow::compute::DictionaryEncodeOptions::MASK" + + cdef cppclass CDictionaryEncodeOptions \ + "arrow::compute::DictionaryEncodeOptions"(CFunctionOptions): + CDictionaryEncodeOptions( + CDictionaryEncodeNullEncodingBehavior null_encoding) + CDictionaryEncodeNullEncodingBehavior null_encoding + + cdef cppclass CTakeOptions \ + " arrow::compute::TakeOptions"(CFunctionOptions): + CTakeOptions(c_bool boundscheck) + c_bool boundscheck + + cdef cppclass CStrptimeOptions \ + "arrow::compute::StrptimeOptions"(CFunctionOptions): + CStrptimeOptions(c_string format, TimeUnit unit) + c_string format + TimeUnit unit + + cdef cppclass CStrftimeOptions \ + "arrow::compute::StrftimeOptions"(CFunctionOptions): + CStrftimeOptions(c_string format, c_string locale) + c_string format + c_string locale + + cdef cppclass CDayOfWeekOptions \ + "arrow::compute::DayOfWeekOptions"(CFunctionOptions): + CDayOfWeekOptions(c_bool count_from_zero, uint32_t week_start) + c_bool count_from_zero + uint32_t week_start + + cdef enum CAssumeTimezoneAmbiguous \ + "arrow::compute::AssumeTimezoneOptions::Ambiguous": + CAssumeTimezoneAmbiguous_AMBIGUOUS_RAISE \ + "arrow::compute::AssumeTimezoneOptions::AMBIGUOUS_RAISE" + CAssumeTimezoneAmbiguous_AMBIGUOUS_EARLIEST \ + "arrow::compute::AssumeTimezoneOptions::AMBIGUOUS_EARLIEST" + CAssumeTimezoneAmbiguous_AMBIGUOUS_LATEST \ + "arrow::compute::AssumeTimezoneOptions::AMBIGUOUS_LATEST" + + cdef enum CAssumeTimezoneNonexistent \ + "arrow::compute::AssumeTimezoneOptions::Nonexistent": + CAssumeTimezoneNonexistent_NONEXISTENT_RAISE \ + "arrow::compute::AssumeTimezoneOptions::NONEXISTENT_RAISE" + CAssumeTimezoneNonexistent_NONEXISTENT_EARLIEST \ + "arrow::compute::AssumeTimezoneOptions::NONEXISTENT_EARLIEST" + CAssumeTimezoneNonexistent_NONEXISTENT_LATEST \ + "arrow::compute::AssumeTimezoneOptions::NONEXISTENT_LATEST" + + cdef cppclass CAssumeTimezoneOptions \ + "arrow::compute::AssumeTimezoneOptions"(CFunctionOptions): + CAssumeTimezoneOptions(c_string timezone, + CAssumeTimezoneAmbiguous ambiguous, + CAssumeTimezoneNonexistent nonexistent) + c_string timezone + CAssumeTimezoneAmbiguous ambiguous + CAssumeTimezoneNonexistent nonexistent + + cdef cppclass CWeekOptions \ + "arrow::compute::WeekOptions"(CFunctionOptions): + CWeekOptions(c_bool week_starts_monday, c_bool count_from_zero, + c_bool first_week_is_fully_in_year) + c_bool week_starts_monday + c_bool count_from_zero + c_bool first_week_is_fully_in_year + + cdef cppclass CNullOptions \ + "arrow::compute::NullOptions"(CFunctionOptions): + CNullOptions(c_bool nan_is_null) + c_bool nan_is_null + + cdef cppclass CVarianceOptions \ + "arrow::compute::VarianceOptions"(CFunctionOptions): + CVarianceOptions(int ddof, c_bool skip_nulls, uint32_t min_count) + int ddof + c_bool skip_nulls + uint32_t min_count + + cdef cppclass CScalarAggregateOptions \ + "arrow::compute::ScalarAggregateOptions"(CFunctionOptions): + CScalarAggregateOptions(c_bool skip_nulls, uint32_t min_count) + c_bool skip_nulls + uint32_t min_count + + cdef enum CCountMode "arrow::compute::CountOptions::CountMode": + CCountMode_ONLY_VALID "arrow::compute::CountOptions::ONLY_VALID" + CCountMode_ONLY_NULL "arrow::compute::CountOptions::ONLY_NULL" + CCountMode_ALL "arrow::compute::CountOptions::ALL" + + cdef cppclass CCountOptions \ + "arrow::compute::CountOptions"(CFunctionOptions): + CCountOptions(CCountMode mode) + CCountMode mode + + cdef cppclass CModeOptions \ + "arrow::compute::ModeOptions"(CFunctionOptions): + CModeOptions(int64_t n, c_bool skip_nulls, uint32_t min_count) + int64_t n + c_bool skip_nulls + uint32_t min_count + + cdef cppclass CIndexOptions \ + "arrow::compute::IndexOptions"(CFunctionOptions): + CIndexOptions(shared_ptr[CScalar] value) + shared_ptr[CScalar] value + + cdef cppclass CMakeStructOptions \ + "arrow::compute::MakeStructOptions"(CFunctionOptions): + CMakeStructOptions(vector[c_string] n, + vector[c_bool] r, + vector[shared_ptr[const CKeyValueMetadata]] m) + CMakeStructOptions(vector[c_string] n) + vector[c_string] field_names + vector[c_bool] field_nullability + vector[shared_ptr[const CKeyValueMetadata]] field_metadata + + ctypedef enum CSortOrder" arrow::compute::SortOrder": + CSortOrder_Ascending \ + "arrow::compute::SortOrder::Ascending" + CSortOrder_Descending \ + "arrow::compute::SortOrder::Descending" + + ctypedef enum CNullPlacement" arrow::compute::NullPlacement": + CNullPlacement_AtStart \ + "arrow::compute::NullPlacement::AtStart" + CNullPlacement_AtEnd \ + "arrow::compute::NullPlacement::AtEnd" + + cdef cppclass CPartitionNthOptions \ + "arrow::compute::PartitionNthOptions"(CFunctionOptions): + CPartitionNthOptions(int64_t pivot, CNullPlacement) + int64_t pivot + CNullPlacement null_placement + + cdef cppclass CArraySortOptions \ + "arrow::compute::ArraySortOptions"(CFunctionOptions): + CArraySortOptions(CSortOrder, CNullPlacement) + CSortOrder order + CNullPlacement null_placement + + cdef cppclass CSortKey" arrow::compute::SortKey": + CSortKey(c_string name, CSortOrder order) + c_string name + CSortOrder order + + cdef cppclass CSortOptions \ + "arrow::compute::SortOptions"(CFunctionOptions): + CSortOptions(vector[CSortKey] sort_keys, CNullPlacement) + vector[CSortKey] sort_keys + CNullPlacement null_placement + + cdef cppclass CSelectKOptions \ + "arrow::compute::SelectKOptions"(CFunctionOptions): + CSelectKOptions(int64_t k, vector[CSortKey] sort_keys) + int64_t k + vector[CSortKey] sort_keys + + cdef enum CQuantileInterp \ + "arrow::compute::QuantileOptions::Interpolation": + CQuantileInterp_LINEAR "arrow::compute::QuantileOptions::LINEAR" + CQuantileInterp_LOWER "arrow::compute::QuantileOptions::LOWER" + CQuantileInterp_HIGHER "arrow::compute::QuantileOptions::HIGHER" + CQuantileInterp_NEAREST "arrow::compute::QuantileOptions::NEAREST" + CQuantileInterp_MIDPOINT "arrow::compute::QuantileOptions::MIDPOINT" + + cdef cppclass CQuantileOptions \ + "arrow::compute::QuantileOptions"(CFunctionOptions): + CQuantileOptions(vector[double] q, CQuantileInterp interpolation, + c_bool skip_nulls, uint32_t min_count) + vector[double] q + CQuantileInterp interpolation + c_bool skip_nulls + uint32_t min_count + + cdef cppclass CTDigestOptions \ + "arrow::compute::TDigestOptions"(CFunctionOptions): + CTDigestOptions(vector[double] q, + uint32_t delta, uint32_t buffer_size, + c_bool skip_nulls, uint32_t min_count) + vector[double] q + uint32_t delta + uint32_t buffer_size + c_bool skip_nulls + uint32_t min_count + + cdef enum DatumType" arrow::Datum::type": + DatumType_NONE" arrow::Datum::NONE" + DatumType_SCALAR" arrow::Datum::SCALAR" + DatumType_ARRAY" arrow::Datum::ARRAY" + DatumType_CHUNKED_ARRAY" arrow::Datum::CHUNKED_ARRAY" + DatumType_RECORD_BATCH" arrow::Datum::RECORD_BATCH" + DatumType_TABLE" arrow::Datum::TABLE" + DatumType_COLLECTION" arrow::Datum::COLLECTION" + + cdef cppclass CDatum" arrow::Datum": + CDatum() + CDatum(const shared_ptr[CArray]& value) + CDatum(const shared_ptr[CChunkedArray]& value) + CDatum(const shared_ptr[CScalar]& value) + CDatum(const shared_ptr[CRecordBatch]& value) + CDatum(const shared_ptr[CTable]& value) + + DatumType kind() const + c_string ToString() const + + const shared_ptr[CArrayData]& array() const + const shared_ptr[CChunkedArray]& chunked_array() const + const shared_ptr[CRecordBatch]& record_batch() const + const shared_ptr[CTable]& table() const + const shared_ptr[CScalar]& scalar() const + + cdef cppclass CSetLookupOptions \ + "arrow::compute::SetLookupOptions"(CFunctionOptions): + CSetLookupOptions(CDatum value_set, c_bool skip_nulls) + CDatum value_set + c_bool skip_nulls + + +cdef extern from * namespace "arrow::compute": + # inlined from compute/function_internal.h to avoid exposing + # implementation details + """ + #include "arrow/compute/function.h" + namespace arrow { + namespace compute { + namespace internal { + Result<std::unique_ptr<FunctionOptions>> DeserializeFunctionOptions( + const Buffer& buffer); + } // namespace internal + } // namespace compute + } // namespace arrow + """ + CResult[unique_ptr[CFunctionOptions]] DeserializeFunctionOptions \ + " arrow::compute::internal::DeserializeFunctionOptions"( + const CBuffer& buffer) + + +cdef extern from "arrow/python/api.h" namespace "arrow::py": + # Requires GIL + CResult[shared_ptr[CDataType]] InferArrowType( + object obj, object mask, c_bool pandas_null_sentinels) + + +cdef extern from "arrow/python/api.h" namespace "arrow::py::internal": + object NewMonthDayNanoTupleType() + CResult[PyObject*] MonthDayNanoIntervalArrayToPyList( + const CMonthDayNanoIntervalArray& array) + CResult[PyObject*] MonthDayNanoIntervalScalarToPyObject( + const CMonthDayNanoIntervalScalar& scalar) + + +cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil: + shared_ptr[CDataType] GetPrimitiveType(Type type) + + object PyHalf_FromHalf(npy_half value) + + cdef cppclass PyConversionOptions: + PyConversionOptions() + + shared_ptr[CDataType] type + int64_t size + CMemoryPool* pool + c_bool from_pandas + c_bool ignore_timezone + c_bool strict + + # TODO Some functions below are not actually "nogil" + + CResult[shared_ptr[CChunkedArray]] ConvertPySequence( + object obj, object mask, const PyConversionOptions& options, + CMemoryPool* pool) + + CStatus NumPyDtypeToArrow(object dtype, shared_ptr[CDataType]* type) + + CStatus NdarrayToArrow(CMemoryPool* pool, object ao, object mo, + c_bool from_pandas, + const shared_ptr[CDataType]& type, + shared_ptr[CChunkedArray]* out) + + CStatus NdarrayToArrow(CMemoryPool* pool, object ao, object mo, + c_bool from_pandas, + const shared_ptr[CDataType]& type, + const CCastOptions& cast_options, + shared_ptr[CChunkedArray]* out) + + CStatus NdarrayToTensor(CMemoryPool* pool, object ao, + const vector[c_string]& dim_names, + shared_ptr[CTensor]* out) + + CStatus TensorToNdarray(const shared_ptr[CTensor]& tensor, object base, + PyObject** out) + + CStatus SparseCOOTensorToNdarray( + const shared_ptr[CSparseCOOTensor]& sparse_tensor, object base, + PyObject** out_data, PyObject** out_coords) + + CStatus SparseCSRMatrixToNdarray( + const shared_ptr[CSparseCSRMatrix]& sparse_tensor, object base, + PyObject** out_data, PyObject** out_indptr, PyObject** out_indices) + + CStatus SparseCSCMatrixToNdarray( + const shared_ptr[CSparseCSCMatrix]& sparse_tensor, object base, + PyObject** out_data, PyObject** out_indptr, PyObject** out_indices) + + CStatus SparseCSFTensorToNdarray( + const shared_ptr[CSparseCSFTensor]& sparse_tensor, object base, + PyObject** out_data, PyObject** out_indptr, PyObject** out_indices) + + CStatus NdarraysToSparseCOOTensor(CMemoryPool* pool, object data_ao, + object coords_ao, + const vector[int64_t]& shape, + const vector[c_string]& dim_names, + shared_ptr[CSparseCOOTensor]* out) + + CStatus NdarraysToSparseCSRMatrix(CMemoryPool* pool, object data_ao, + object indptr_ao, object indices_ao, + const vector[int64_t]& shape, + const vector[c_string]& dim_names, + shared_ptr[CSparseCSRMatrix]* out) + + CStatus NdarraysToSparseCSCMatrix(CMemoryPool* pool, object data_ao, + object indptr_ao, object indices_ao, + const vector[int64_t]& shape, + const vector[c_string]& dim_names, + shared_ptr[CSparseCSCMatrix]* out) + + CStatus NdarraysToSparseCSFTensor(CMemoryPool* pool, object data_ao, + object indptr_ao, object indices_ao, + const vector[int64_t]& shape, + const vector[int64_t]& axis_order, + const vector[c_string]& dim_names, + shared_ptr[CSparseCSFTensor]* out) + + CStatus TensorToSparseCOOTensor(shared_ptr[CTensor], + shared_ptr[CSparseCOOTensor]* out) + + CStatus TensorToSparseCSRMatrix(shared_ptr[CTensor], + shared_ptr[CSparseCSRMatrix]* out) + + CStatus TensorToSparseCSCMatrix(shared_ptr[CTensor], + shared_ptr[CSparseCSCMatrix]* out) + + CStatus TensorToSparseCSFTensor(shared_ptr[CTensor], + shared_ptr[CSparseCSFTensor]* out) + + CStatus ConvertArrayToPandas(const PandasOptions& options, + shared_ptr[CArray] arr, + object py_ref, PyObject** out) + + CStatus ConvertChunkedArrayToPandas(const PandasOptions& options, + shared_ptr[CChunkedArray] arr, + object py_ref, PyObject** out) + + CStatus ConvertTableToPandas(const PandasOptions& options, + shared_ptr[CTable] table, + PyObject** out) + + void c_set_default_memory_pool \ + " arrow::py::set_default_memory_pool"(CMemoryPool* pool)\ + + CMemoryPool* c_get_memory_pool \ + " arrow::py::get_memory_pool"() + + cdef cppclass PyBuffer(CBuffer): + @staticmethod + CResult[shared_ptr[CBuffer]] FromPyObject(object obj) + + cdef cppclass PyForeignBuffer(CBuffer): + @staticmethod + CStatus Make(const uint8_t* data, int64_t size, object base, + shared_ptr[CBuffer]* out) + + cdef cppclass PyReadableFile(CRandomAccessFile): + PyReadableFile(object fo) + + cdef cppclass PyOutputStream(COutputStream): + PyOutputStream(object fo) + + cdef cppclass PandasOptions: + CMemoryPool* pool + c_bool strings_to_categorical + c_bool zero_copy_only + c_bool integer_object_nulls + c_bool date_as_object + c_bool timestamp_as_object + c_bool use_threads + c_bool coerce_temporal_nanoseconds + c_bool ignore_timezone + c_bool deduplicate_objects + c_bool safe_cast + c_bool split_blocks + c_bool self_destruct + c_bool decode_dictionaries + unordered_set[c_string] categorical_columns + unordered_set[c_string] extension_columns + + cdef cppclass CSerializedPyObject" arrow::py::SerializedPyObject": + shared_ptr[CRecordBatch] batch + vector[shared_ptr[CTensor]] tensors + + CStatus WriteTo(COutputStream* dst) + CStatus GetComponents(CMemoryPool* pool, PyObject** dst) + + CStatus SerializeObject(object context, object sequence, + CSerializedPyObject* out) + + CStatus DeserializeObject(object context, + const CSerializedPyObject& obj, + PyObject* base, PyObject** out) + + CStatus ReadSerializedObject(CRandomAccessFile* src, + CSerializedPyObject* out) + + cdef cppclass SparseTensorCounts: + SparseTensorCounts() + int coo + int csr + int csc + int csf + int ndim_csf + int num_total_tensors() const + int num_total_buffers() const + + CStatus GetSerializedFromComponents( + int num_tensors, + const SparseTensorCounts& num_sparse_tensors, + int num_ndarrays, + int num_buffers, + object buffers, + CSerializedPyObject* out) + + +cdef extern from "arrow/python/api.h" namespace "arrow::py::internal" nogil: + cdef cppclass CTimePoint "arrow::py::internal::TimePoint": + pass + + CTimePoint PyDateTime_to_TimePoint(PyDateTime_DateTime* pydatetime) + int64_t TimePoint_to_ns(CTimePoint val) + CTimePoint TimePoint_from_s(double val) + CTimePoint TimePoint_from_ns(int64_t val) + + CResult[c_string] TzinfoToString(PyObject* pytzinfo) + CResult[PyObject*] StringToTzinfo(c_string) + + +cdef extern from "arrow/python/init.h": + int arrow_init_numpy() except -1 + + +cdef extern from "arrow/python/pyarrow.h" namespace "arrow::py": + int import_pyarrow() except -1 + + +cdef extern from "arrow/python/common.h" namespace "arrow::py": + c_bool IsPyError(const CStatus& status) + void RestorePyError(const CStatus& status) + + +cdef extern from "arrow/python/inference.h" namespace "arrow::py": + c_bool IsPyBool(object o) + c_bool IsPyInt(object o) + c_bool IsPyFloat(object o) + + +cdef extern from "arrow/python/ipc.h" namespace "arrow::py": + cdef cppclass CPyRecordBatchReader" arrow::py::PyRecordBatchReader" \ + (CRecordBatchReader): + @staticmethod + CResult[shared_ptr[CRecordBatchReader]] Make(shared_ptr[CSchema], + object) + + +cdef extern from "arrow/extension_type.h" namespace "arrow": + cdef cppclass CExtensionTypeRegistry" arrow::ExtensionTypeRegistry": + @staticmethod + shared_ptr[CExtensionTypeRegistry] GetGlobalRegistry() + + cdef cppclass CExtensionType" arrow::ExtensionType"(CDataType): + c_string extension_name() + shared_ptr[CDataType] storage_type() + + @staticmethod + shared_ptr[CArray] WrapArray(shared_ptr[CDataType] ext_type, + shared_ptr[CArray] storage) + + @staticmethod + shared_ptr[CChunkedArray] WrapArray(shared_ptr[CDataType] ext_type, + shared_ptr[CChunkedArray] storage) + + cdef cppclass CExtensionArray" arrow::ExtensionArray"(CArray): + CExtensionArray(shared_ptr[CDataType], shared_ptr[CArray] storage) + + shared_ptr[CArray] storage() + + +cdef extern from "arrow/python/extension_type.h" namespace "arrow::py": + cdef cppclass CPyExtensionType \ + " arrow::py::PyExtensionType"(CExtensionType): + @staticmethod + CStatus FromClass(const shared_ptr[CDataType] storage_type, + const c_string extension_name, object typ, + shared_ptr[CExtensionType]* out) + + @staticmethod + CStatus FromInstance(shared_ptr[CDataType] storage_type, + object inst, shared_ptr[CExtensionType]* out) + + object GetInstance() + CStatus SetInstance(object) + + c_string PyExtensionName() + CStatus RegisterPyExtensionType(shared_ptr[CDataType]) + CStatus UnregisterPyExtensionType(c_string type_name) + + +cdef extern from "arrow/python/benchmark.h" namespace "arrow::py::benchmark": + void Benchmark_PandasObjectIsNull(object lst) except * + + +cdef extern from "arrow/util/compression.h" namespace "arrow" nogil: + cdef enum CCompressionType" arrow::Compression::type": + CCompressionType_UNCOMPRESSED" arrow::Compression::UNCOMPRESSED" + CCompressionType_SNAPPY" arrow::Compression::SNAPPY" + CCompressionType_GZIP" arrow::Compression::GZIP" + CCompressionType_BROTLI" arrow::Compression::BROTLI" + CCompressionType_ZSTD" arrow::Compression::ZSTD" + CCompressionType_LZ4" arrow::Compression::LZ4" + CCompressionType_LZ4_FRAME" arrow::Compression::LZ4_FRAME" + CCompressionType_BZ2" arrow::Compression::BZ2" + + cdef cppclass CCodec" arrow::util::Codec": + @staticmethod + CResult[unique_ptr[CCodec]] Create(CCompressionType codec) + + @staticmethod + CResult[unique_ptr[CCodec]] CreateWithLevel" Create"( + CCompressionType codec, + int compression_level) + + @staticmethod + c_bool SupportsCompressionLevel(CCompressionType codec) + + @staticmethod + CResult[int] MinimumCompressionLevel(CCompressionType codec) + + @staticmethod + CResult[int] MaximumCompressionLevel(CCompressionType codec) + + @staticmethod + CResult[int] DefaultCompressionLevel(CCompressionType codec) + + @staticmethod + c_bool IsAvailable(CCompressionType codec) + + CResult[int64_t] Decompress(int64_t input_len, const uint8_t* input, + int64_t output_len, + uint8_t* output_buffer) + CResult[int64_t] Compress(int64_t input_len, const uint8_t* input, + int64_t output_buffer_len, + uint8_t* output_buffer) + c_string name() const + int compression_level() const + int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input) + + +cdef extern from "arrow/util/io_util.h" namespace "arrow::internal" nogil: + int ErrnoFromStatus(CStatus status) + int WinErrorFromStatus(CStatus status) + int SignalFromStatus(CStatus status) + + CStatus SendSignal(int signum) + CStatus SendSignalToThread(int signum, uint64_t thread_id) + + +cdef extern from "arrow/util/iterator.h" namespace "arrow" nogil: + cdef cppclass CIterator" arrow::Iterator"[T]: + CResult[T] Next() + CStatus Visit[Visitor](Visitor&& visitor) + cppclass RangeIterator: + CResult[T] operator*() + RangeIterator& operator++() + c_bool operator!=(RangeIterator) const + RangeIterator begin() + RangeIterator end() + CIterator[T] MakeVectorIterator[T](vector[T] v) + +cdef extern from "arrow/util/thread_pool.h" namespace "arrow" nogil: + int GetCpuThreadPoolCapacity() + CStatus SetCpuThreadPoolCapacity(int threads) + +cdef extern from "arrow/array/concatenate.h" namespace "arrow" nogil: + CResult[shared_ptr[CArray]] Concatenate( + const vector[shared_ptr[CArray]]& arrays, + CMemoryPool* pool) + +cdef extern from "arrow/c/abi.h": + cdef struct ArrowSchema: + pass + + cdef struct ArrowArray: + pass + + cdef struct ArrowArrayStream: + pass + +cdef extern from "arrow/c/bridge.h" namespace "arrow" nogil: + CStatus ExportType(CDataType&, ArrowSchema* out) + CResult[shared_ptr[CDataType]] ImportType(ArrowSchema*) + + CStatus ExportField(CField&, ArrowSchema* out) + CResult[shared_ptr[CField]] ImportField(ArrowSchema*) + + CStatus ExportSchema(CSchema&, ArrowSchema* out) + CResult[shared_ptr[CSchema]] ImportSchema(ArrowSchema*) + + CStatus ExportArray(CArray&, ArrowArray* out) + CStatus ExportArray(CArray&, ArrowArray* out, ArrowSchema* out_schema) + CResult[shared_ptr[CArray]] ImportArray(ArrowArray*, + shared_ptr[CDataType]) + CResult[shared_ptr[CArray]] ImportArray(ArrowArray*, ArrowSchema*) + + CStatus ExportRecordBatch(CRecordBatch&, ArrowArray* out) + CStatus ExportRecordBatch(CRecordBatch&, ArrowArray* out, + ArrowSchema* out_schema) + CResult[shared_ptr[CRecordBatch]] ImportRecordBatch(ArrowArray*, + shared_ptr[CSchema]) + CResult[shared_ptr[CRecordBatch]] ImportRecordBatch(ArrowArray*, + ArrowSchema*) + + CStatus ExportRecordBatchReader(shared_ptr[CRecordBatchReader], + ArrowArrayStream*) + CResult[shared_ptr[CRecordBatchReader]] ImportRecordBatchReader( + ArrowArrayStream*) diff --git a/src/arrow/python/pyarrow/includes/libarrow_cuda.pxd b/src/arrow/python/pyarrow/includes/libarrow_cuda.pxd new file mode 100644 index 000000000..3ac943cf9 --- /dev/null +++ b/src/arrow/python/pyarrow/includes/libarrow_cuda.pxd @@ -0,0 +1,107 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# distutils: language = c++ + +from pyarrow.includes.libarrow cimport * + +cdef extern from "arrow/gpu/cuda_api.h" namespace "arrow::cuda" nogil: + + cdef cppclass CCudaDeviceManager" arrow::cuda::CudaDeviceManager": + @staticmethod + CResult[CCudaDeviceManager*] Instance() + CResult[shared_ptr[CCudaContext]] GetContext(int gpu_number) + CResult[shared_ptr[CCudaContext]] GetSharedContext(int gpu_number, + void* handle) + CStatus AllocateHost(int device_number, int64_t nbytes, + shared_ptr[CCudaHostBuffer]* buffer) + int num_devices() const + + cdef cppclass CCudaContext" arrow::cuda::CudaContext": + CResult[shared_ptr[CCudaBuffer]] Allocate(int64_t nbytes) + CResult[shared_ptr[CCudaBuffer]] View(uint8_t* data, int64_t nbytes) + CResult[shared_ptr[CCudaBuffer]] OpenIpcBuffer( + const CCudaIpcMemHandle& ipc_handle) + CStatus Synchronize() + int64_t bytes_allocated() const + const void* handle() const + int device_number() const + CResult[uintptr_t] GetDeviceAddress(uintptr_t addr) + + cdef cppclass CCudaIpcMemHandle" arrow::cuda::CudaIpcMemHandle": + @staticmethod + CResult[shared_ptr[CCudaIpcMemHandle]] FromBuffer( + const void* opaque_handle) + CResult[shared_ptr[CBuffer]] Serialize(CMemoryPool* pool) const + + cdef cppclass CCudaBuffer" arrow::cuda::CudaBuffer"(CBuffer): + CCudaBuffer(uint8_t* data, int64_t size, + const shared_ptr[CCudaContext]& context, + c_bool own_data=false, c_bool is_ipc=false) + CCudaBuffer(const shared_ptr[CCudaBuffer]& parent, + const int64_t offset, const int64_t size) + + @staticmethod + CResult[shared_ptr[CCudaBuffer]] FromBuffer(shared_ptr[CBuffer] buf) + + CStatus CopyToHost(const int64_t position, const int64_t nbytes, + void* out) const + CStatus CopyFromHost(const int64_t position, const void* data, + int64_t nbytes) + CStatus CopyFromDevice(const int64_t position, const void* data, + int64_t nbytes) + CStatus CopyFromAnotherDevice(const shared_ptr[CCudaContext]& src_ctx, + const int64_t position, const void* data, + int64_t nbytes) + CResult[shared_ptr[CCudaIpcMemHandle]] ExportForIpc() + shared_ptr[CCudaContext] context() const + + cdef cppclass \ + CCudaHostBuffer" arrow::cuda::CudaHostBuffer"(CMutableBuffer): + pass + + cdef cppclass \ + CCudaBufferReader" arrow::cuda::CudaBufferReader"(CBufferReader): + CCudaBufferReader(const shared_ptr[CBuffer]& buffer) + CResult[int64_t] Read(int64_t nbytes, void* buffer) + CResult[shared_ptr[CBuffer]] Read(int64_t nbytes) + + cdef cppclass \ + CCudaBufferWriter" arrow::cuda::CudaBufferWriter"(WritableFile): + CCudaBufferWriter(const shared_ptr[CCudaBuffer]& buffer) + CStatus Close() + CStatus Write(const void* data, int64_t nbytes) + CStatus WriteAt(int64_t position, const void* data, int64_t nbytes) + CStatus SetBufferSize(const int64_t buffer_size) + int64_t buffer_size() + int64_t num_bytes_buffered() const + + CResult[shared_ptr[CCudaHostBuffer]] AllocateCudaHostBuffer( + int device_number, const int64_t size) + + # Cuda prefix is added to avoid picking up arrow::cuda functions + # from arrow namespace. + CResult[shared_ptr[CCudaBuffer]] \ + CudaSerializeRecordBatch" arrow::cuda::SerializeRecordBatch"\ + (const CRecordBatch& batch, + CCudaContext* ctx) + CResult[shared_ptr[CRecordBatch]] \ + CudaReadRecordBatch" arrow::cuda::ReadRecordBatch"\ + (const shared_ptr[CSchema]& schema, + CDictionaryMemo* dictionary_memo, + const shared_ptr[CCudaBuffer]& buffer, + CMemoryPool* pool) diff --git a/src/arrow/python/pyarrow/includes/libarrow_dataset.pxd b/src/arrow/python/pyarrow/includes/libarrow_dataset.pxd new file mode 100644 index 000000000..abc79fea8 --- /dev/null +++ b/src/arrow/python/pyarrow/includes/libarrow_dataset.pxd @@ -0,0 +1,478 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# distutils: language = c++ + +from libcpp.unordered_map cimport unordered_map + +from pyarrow.includes.common cimport * +from pyarrow.includes.libarrow cimport * +from pyarrow.includes.libarrow_fs cimport * +from pyarrow._parquet cimport * + + +cdef extern from "arrow/api.h" namespace "arrow" nogil: + + cdef cppclass CRecordBatchIterator "arrow::RecordBatchIterator"( + CIterator[shared_ptr[CRecordBatch]]): + pass + + +cdef extern from * namespace "arrow::compute": + # inlined from expression_internal.h to avoid + # proliferation of #include <unordered_map> + """ + #include <unordered_map> + + #include "arrow/type.h" + #include "arrow/datum.h" + + namespace arrow { + namespace compute { + struct KnownFieldValues { + std::unordered_map<FieldRef, Datum, FieldRef::Hash> map; + }; + } // namespace compute + } // namespace arrow + """ + cdef struct CKnownFieldValues "arrow::compute::KnownFieldValues": + unordered_map[CFieldRef, CDatum, CFieldRefHash] map + +cdef extern from "arrow/compute/exec/expression.h" \ + namespace "arrow::compute" nogil: + + cdef cppclass CExpression "arrow::compute::Expression": + c_bool Equals(const CExpression& other) const + c_string ToString() const + CResult[CExpression] Bind(const CSchema&) + + cdef CExpression CMakeScalarExpression \ + "arrow::compute::literal"(shared_ptr[CScalar] value) + + cdef CExpression CMakeFieldExpression \ + "arrow::compute::field_ref"(c_string name) + + cdef CExpression CMakeCallExpression \ + "arrow::compute::call"(c_string function, + vector[CExpression] arguments, + shared_ptr[CFunctionOptions] options) + + cdef CResult[shared_ptr[CBuffer]] CSerializeExpression \ + "arrow::compute::Serialize"(const CExpression&) + + cdef CResult[CExpression] CDeserializeExpression \ + "arrow::compute::Deserialize"(shared_ptr[CBuffer]) + + cdef CResult[CKnownFieldValues] \ + CExtractKnownFieldValues "arrow::compute::ExtractKnownFieldValues"( + const CExpression& partition_expression) + +ctypedef CStatus cb_writer_finish_internal(CFileWriter*) +ctypedef void cb_writer_finish(dict, CFileWriter*) + +cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil: + + cdef enum ExistingDataBehavior" arrow::dataset::ExistingDataBehavior": + ExistingDataBehavior_DELETE_MATCHING" \ + arrow::dataset::ExistingDataBehavior::kDeleteMatchingPartitions" + ExistingDataBehavior_OVERWRITE_OR_IGNORE" \ + arrow::dataset::ExistingDataBehavior::kOverwriteOrIgnore" + ExistingDataBehavior_ERROR" \ + arrow::dataset::ExistingDataBehavior::kError" + + cdef cppclass CScanOptions "arrow::dataset::ScanOptions": + @staticmethod + shared_ptr[CScanOptions] Make(shared_ptr[CSchema] schema) + + shared_ptr[CSchema] dataset_schema + shared_ptr[CSchema] projected_schema + + cdef cppclass CFragmentScanOptions "arrow::dataset::FragmentScanOptions": + c_string type_name() const + + ctypedef CIterator[shared_ptr[CScanTask]] CScanTaskIterator \ + "arrow::dataset::ScanTaskIterator" + + cdef cppclass CScanTask" arrow::dataset::ScanTask": + CResult[CRecordBatchIterator] Execute() + + cdef cppclass CFragment "arrow::dataset::Fragment": + CResult[shared_ptr[CSchema]] ReadPhysicalSchema() + CResult[CScanTaskIterator] Scan(shared_ptr[CScanOptions] options) + c_bool splittable() const + c_string type_name() const + const CExpression& partition_expression() const + + ctypedef vector[shared_ptr[CFragment]] CFragmentVector \ + "arrow::dataset::FragmentVector" + + ctypedef CIterator[shared_ptr[CFragment]] CFragmentIterator \ + "arrow::dataset::FragmentIterator" + + cdef cppclass CInMemoryFragment "arrow::dataset::InMemoryFragment"( + CFragment): + CInMemoryFragment(vector[shared_ptr[CRecordBatch]] record_batches, + CExpression partition_expression) + + cdef cppclass CTaggedRecordBatch "arrow::dataset::TaggedRecordBatch": + shared_ptr[CRecordBatch] record_batch + shared_ptr[CFragment] fragment + + ctypedef CIterator[CTaggedRecordBatch] CTaggedRecordBatchIterator \ + "arrow::dataset::TaggedRecordBatchIterator" + + cdef cppclass CScanner "arrow::dataset::Scanner": + CScanner(shared_ptr[CDataset], shared_ptr[CScanOptions]) + CScanner(shared_ptr[CFragment], shared_ptr[CScanOptions]) + CResult[CScanTaskIterator] Scan() + CResult[CTaggedRecordBatchIterator] ScanBatches() + CResult[shared_ptr[CTable]] ToTable() + CResult[shared_ptr[CTable]] TakeRows(const CArray& indices) + CResult[shared_ptr[CTable]] Head(int64_t num_rows) + CResult[int64_t] CountRows() + CResult[CFragmentIterator] GetFragments() + CResult[shared_ptr[CRecordBatchReader]] ToRecordBatchReader() + const shared_ptr[CScanOptions]& options() + + cdef cppclass CScannerBuilder "arrow::dataset::ScannerBuilder": + CScannerBuilder(shared_ptr[CDataset], + shared_ptr[CScanOptions] scan_options) + CScannerBuilder(shared_ptr[CSchema], shared_ptr[CFragment], + shared_ptr[CScanOptions] scan_options) + + @staticmethod + shared_ptr[CScannerBuilder] FromRecordBatchReader( + shared_ptr[CRecordBatchReader] reader) + CStatus ProjectColumns "Project"(const vector[c_string]& columns) + CStatus Project(vector[CExpression]& exprs, vector[c_string]& columns) + CStatus Filter(CExpression filter) + CStatus UseThreads(c_bool use_threads) + CStatus UseAsync(c_bool use_async) + CStatus Pool(CMemoryPool* pool) + CStatus BatchSize(int64_t batch_size) + CStatus FragmentScanOptions( + shared_ptr[CFragmentScanOptions] fragment_scan_options) + CResult[shared_ptr[CScanner]] Finish() + shared_ptr[CSchema] schema() const + + ctypedef vector[shared_ptr[CDataset]] CDatasetVector \ + "arrow::dataset::DatasetVector" + + cdef cppclass CDataset "arrow::dataset::Dataset": + const shared_ptr[CSchema] & schema() + CResult[CFragmentIterator] GetFragments() + CResult[CFragmentIterator] GetFragments(CExpression predicate) + const CExpression & partition_expression() + c_string type_name() + + CResult[shared_ptr[CDataset]] ReplaceSchema(shared_ptr[CSchema]) + + CResult[shared_ptr[CScannerBuilder]] NewScan() + + cdef cppclass CInMemoryDataset "arrow::dataset::InMemoryDataset"( + CDataset): + CInMemoryDataset(shared_ptr[CRecordBatchReader]) + CInMemoryDataset(shared_ptr[CTable]) + + cdef cppclass CUnionDataset "arrow::dataset::UnionDataset"( + CDataset): + @staticmethod + CResult[shared_ptr[CUnionDataset]] Make(shared_ptr[CSchema] schema, + CDatasetVector children) + + const CDatasetVector& children() const + + cdef cppclass CInspectOptions "arrow::dataset::InspectOptions": + int fragments + + cdef cppclass CFinishOptions "arrow::dataset::FinishOptions": + shared_ptr[CSchema] schema + CInspectOptions inspect_options + c_bool validate_fragments + + cdef cppclass CDatasetFactory "arrow::dataset::DatasetFactory": + CResult[vector[shared_ptr[CSchema]]] InspectSchemas(CInspectOptions) + CResult[shared_ptr[CSchema]] Inspect(CInspectOptions) + CResult[shared_ptr[CDataset]] FinishWithSchema "Finish"( + const shared_ptr[CSchema]& schema) + CResult[shared_ptr[CDataset]] Finish() + const CExpression& root_partition() + CStatus SetRootPartition(CExpression partition) + + cdef cppclass CUnionDatasetFactory "arrow::dataset::UnionDatasetFactory": + @staticmethod + CResult[shared_ptr[CDatasetFactory]] Make( + vector[shared_ptr[CDatasetFactory]] factories) + + cdef cppclass CFileSource "arrow::dataset::FileSource": + const c_string& path() const + const shared_ptr[CFileSystem]& filesystem() const + const shared_ptr[CBuffer]& buffer() const + # HACK: Cython can't handle all the overloads so don't declare them. + # This means invalid construction of CFileSource won't be caught in + # the C++ generation phase (though it will still be caught when + # the generated C++ is compiled). + CFileSource(...) + + cdef cppclass CFileWriteOptions \ + "arrow::dataset::FileWriteOptions": + const shared_ptr[CFileFormat]& format() const + c_string type_name() const + + cdef cppclass CFileWriter \ + "arrow::dataset::FileWriter": + const shared_ptr[CFileFormat]& format() const + const shared_ptr[CSchema]& schema() const + const shared_ptr[CFileWriteOptions]& options() const + const CFileLocator& destination() const + + cdef cppclass CParquetFileWriter \ + "arrow::dataset::ParquetFileWriter"(CFileWriter): + const shared_ptr[FileWriter]& parquet_writer() const + + cdef cppclass CFileFormat "arrow::dataset::FileFormat": + shared_ptr[CFragmentScanOptions] default_fragment_scan_options + c_string type_name() const + CResult[shared_ptr[CSchema]] Inspect(const CFileSource&) const + CResult[shared_ptr[CFileFragment]] MakeFragment( + CFileSource source, + CExpression partition_expression, + shared_ptr[CSchema] physical_schema) + shared_ptr[CFileWriteOptions] DefaultWriteOptions() + + cdef cppclass CFileFragment "arrow::dataset::FileFragment"( + CFragment): + const CFileSource& source() const + const shared_ptr[CFileFormat]& format() const + + cdef cppclass CParquetFileWriteOptions \ + "arrow::dataset::ParquetFileWriteOptions"(CFileWriteOptions): + shared_ptr[WriterProperties] writer_properties + shared_ptr[ArrowWriterProperties] arrow_writer_properties + + cdef cppclass CParquetFileFragment "arrow::dataset::ParquetFileFragment"( + CFileFragment): + const vector[int]& row_groups() const + shared_ptr[CFileMetaData] metadata() const + CResult[vector[shared_ptr[CFragment]]] SplitByRowGroup( + CExpression predicate) + CResult[shared_ptr[CFragment]] SubsetWithFilter "Subset"( + CExpression predicate) + CResult[shared_ptr[CFragment]] SubsetWithIds "Subset"( + vector[int] row_group_ids) + CStatus EnsureCompleteMetadata() + + cdef cppclass CFileSystemDatasetWriteOptions \ + "arrow::dataset::FileSystemDatasetWriteOptions": + shared_ptr[CFileWriteOptions] file_write_options + shared_ptr[CFileSystem] filesystem + c_string base_dir + shared_ptr[CPartitioning] partitioning + int max_partitions + c_string basename_template + function[cb_writer_finish_internal] writer_pre_finish + function[cb_writer_finish_internal] writer_post_finish + ExistingDataBehavior existing_data_behavior + + cdef cppclass CFileSystemDataset \ + "arrow::dataset::FileSystemDataset"(CDataset): + @staticmethod + CResult[shared_ptr[CDataset]] Make( + shared_ptr[CSchema] schema, + CExpression source_partition, + shared_ptr[CFileFormat] format, + shared_ptr[CFileSystem] filesystem, + vector[shared_ptr[CFileFragment]] fragments) + + @staticmethod + CStatus Write( + const CFileSystemDatasetWriteOptions& write_options, + shared_ptr[CScanner] scanner) + + c_string type() + vector[c_string] files() + const shared_ptr[CFileFormat]& format() const + const shared_ptr[CFileSystem]& filesystem() const + const shared_ptr[CPartitioning]& partitioning() const + + cdef cppclass CParquetFileFormatReaderOptions \ + "arrow::dataset::ParquetFileFormat::ReaderOptions": + unordered_set[c_string] dict_columns + TimeUnit coerce_int96_timestamp_unit + + cdef cppclass CParquetFileFormat "arrow::dataset::ParquetFileFormat"( + CFileFormat): + CParquetFileFormatReaderOptions reader_options + CResult[shared_ptr[CFileFragment]] MakeFragment( + CFileSource source, + CExpression partition_expression, + shared_ptr[CSchema] physical_schema, + vector[int] row_groups) + + cdef cppclass CParquetFragmentScanOptions \ + "arrow::dataset::ParquetFragmentScanOptions"(CFragmentScanOptions): + shared_ptr[CReaderProperties] reader_properties + shared_ptr[ArrowReaderProperties] arrow_reader_properties + c_bool enable_parallel_column_conversion + + cdef cppclass CIpcFileWriteOptions \ + "arrow::dataset::IpcFileWriteOptions"(CFileWriteOptions): + pass + + cdef cppclass CIpcFileFormat "arrow::dataset::IpcFileFormat"( + CFileFormat): + pass + + cdef cppclass COrcFileFormat "arrow::dataset::OrcFileFormat"( + CFileFormat): + pass + + cdef cppclass CCsvFileWriteOptions \ + "arrow::dataset::CsvFileWriteOptions"(CFileWriteOptions): + shared_ptr[CCSVWriteOptions] write_options + CMemoryPool* pool + + cdef cppclass CCsvFileFormat "arrow::dataset::CsvFileFormat"( + CFileFormat): + CCSVParseOptions parse_options + + cdef cppclass CCsvFragmentScanOptions \ + "arrow::dataset::CsvFragmentScanOptions"(CFragmentScanOptions): + CCSVConvertOptions convert_options + CCSVReadOptions read_options + + cdef cppclass CPartitioning "arrow::dataset::Partitioning": + c_string type_name() const + CResult[CExpression] Parse(const c_string & path) const + const shared_ptr[CSchema] & schema() + + cdef cppclass CSegmentEncoding" arrow::dataset::SegmentEncoding": + pass + + CSegmentEncoding CSegmentEncodingNone\ + " arrow::dataset::SegmentEncoding::None" + CSegmentEncoding CSegmentEncodingUri\ + " arrow::dataset::SegmentEncoding::Uri" + + cdef cppclass CKeyValuePartitioningOptions \ + "arrow::dataset::KeyValuePartitioningOptions": + CSegmentEncoding segment_encoding + + cdef cppclass CHivePartitioningOptions \ + "arrow::dataset::HivePartitioningOptions": + CSegmentEncoding segment_encoding + c_string null_fallback + + cdef cppclass CPartitioningFactoryOptions \ + "arrow::dataset::PartitioningFactoryOptions": + c_bool infer_dictionary + shared_ptr[CSchema] schema + CSegmentEncoding segment_encoding + + cdef cppclass CHivePartitioningFactoryOptions \ + "arrow::dataset::HivePartitioningFactoryOptions": + c_bool infer_dictionary + c_string null_fallback + shared_ptr[CSchema] schema + CSegmentEncoding segment_encoding + + cdef cppclass CPartitioningFactory "arrow::dataset::PartitioningFactory": + c_string type_name() const + + cdef cppclass CDirectoryPartitioning \ + "arrow::dataset::DirectoryPartitioning"(CPartitioning): + CDirectoryPartitioning(shared_ptr[CSchema] schema, + vector[shared_ptr[CArray]] dictionaries) + + @staticmethod + shared_ptr[CPartitioningFactory] MakeFactory( + vector[c_string] field_names, CPartitioningFactoryOptions) + + vector[shared_ptr[CArray]] dictionaries() const + + cdef cppclass CHivePartitioning \ + "arrow::dataset::HivePartitioning"(CPartitioning): + CHivePartitioning(shared_ptr[CSchema] schema, + vector[shared_ptr[CArray]] dictionaries, + CHivePartitioningOptions options) + + @staticmethod + shared_ptr[CPartitioningFactory] MakeFactory( + CHivePartitioningFactoryOptions) + + vector[shared_ptr[CArray]] dictionaries() const + + cdef cppclass CPartitioningOrFactory \ + "arrow::dataset::PartitioningOrFactory": + CPartitioningOrFactory(shared_ptr[CPartitioning]) + CPartitioningOrFactory(shared_ptr[CPartitioningFactory]) + CPartitioningOrFactory & operator = (shared_ptr[CPartitioning]) + CPartitioningOrFactory & operator = ( + shared_ptr[CPartitioningFactory]) + shared_ptr[CPartitioning] partitioning() const + shared_ptr[CPartitioningFactory] factory() const + + cdef cppclass CFileSystemFactoryOptions \ + "arrow::dataset::FileSystemFactoryOptions": + CPartitioningOrFactory partitioning + c_string partition_base_dir + c_bool exclude_invalid_files + vector[c_string] selector_ignore_prefixes + + cdef cppclass CFileSystemDatasetFactory \ + "arrow::dataset::FileSystemDatasetFactory"( + CDatasetFactory): + @staticmethod + CResult[shared_ptr[CDatasetFactory]] MakeFromPaths "Make"( + shared_ptr[CFileSystem] filesystem, + vector[c_string] paths, + shared_ptr[CFileFormat] format, + CFileSystemFactoryOptions options + ) + + @staticmethod + CResult[shared_ptr[CDatasetFactory]] MakeFromSelector "Make"( + shared_ptr[CFileSystem] filesystem, + CFileSelector, + shared_ptr[CFileFormat] format, + CFileSystemFactoryOptions options + ) + + cdef cppclass CParquetFactoryOptions \ + "arrow::dataset::ParquetFactoryOptions": + CPartitioningOrFactory partitioning + c_string partition_base_dir + c_bool validate_column_chunk_paths + + cdef cppclass CParquetDatasetFactory \ + "arrow::dataset::ParquetDatasetFactory"(CDatasetFactory): + @staticmethod + CResult[shared_ptr[CDatasetFactory]] MakeFromMetaDataPath "Make"( + const c_string& metadata_path, + shared_ptr[CFileSystem] filesystem, + shared_ptr[CParquetFileFormat] format, + CParquetFactoryOptions options + ) + + @staticmethod + CResult[shared_ptr[CDatasetFactory]] MakeFromMetaDataSource "Make"( + const CFileSource& metadata_path, + const c_string& base_path, + shared_ptr[CFileSystem] filesystem, + shared_ptr[CParquetFileFormat] format, + CParquetFactoryOptions options + ) diff --git a/src/arrow/python/pyarrow/includes/libarrow_feather.pxd b/src/arrow/python/pyarrow/includes/libarrow_feather.pxd new file mode 100644 index 000000000..ddfc8b2e5 --- /dev/null +++ b/src/arrow/python/pyarrow/includes/libarrow_feather.pxd @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# distutils: language = c++ + +from pyarrow.includes.libarrow cimport (CCompressionType, CStatus, CTable, + COutputStream, CResult, shared_ptr, + vector, CRandomAccessFile, CSchema, + c_string) + + +cdef extern from "arrow/ipc/api.h" namespace "arrow::ipc" nogil: + int kFeatherV1Version" arrow::ipc::feather::kFeatherV1Version" + int kFeatherV2Version" arrow::ipc::feather::kFeatherV2Version" + + cdef cppclass CFeatherProperties" arrow::ipc::feather::WriteProperties": + int version + int chunksize + CCompressionType compression + int compression_level + + CStatus WriteFeather" arrow::ipc::feather::WriteTable" \ + (const CTable& table, COutputStream* out, + CFeatherProperties properties) + + cdef cppclass CFeatherReader" arrow::ipc::feather::Reader": + @staticmethod + CResult[shared_ptr[CFeatherReader]] Open( + const shared_ptr[CRandomAccessFile]& file) + int version() + shared_ptr[CSchema] schema() + + CStatus Read(shared_ptr[CTable]* out) + CStatus Read(const vector[int] indices, shared_ptr[CTable]* out) + CStatus Read(const vector[c_string] names, shared_ptr[CTable]* out) diff --git a/src/arrow/python/pyarrow/includes/libarrow_flight.pxd b/src/arrow/python/pyarrow/includes/libarrow_flight.pxd new file mode 100644 index 000000000..2ac737aba --- /dev/null +++ b/src/arrow/python/pyarrow/includes/libarrow_flight.pxd @@ -0,0 +1,560 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# distutils: language = c++ + +from pyarrow.includes.common cimport * +from pyarrow.includes.libarrow cimport * + + +cdef extern from "arrow/flight/api.h" namespace "arrow" nogil: + cdef char* CPyServerMiddlewareName\ + " arrow::py::flight::kPyServerMiddlewareName" + + cdef cppclass CActionType" arrow::flight::ActionType": + c_string type + c_string description + + cdef cppclass CAction" arrow::flight::Action": + c_string type + shared_ptr[CBuffer] body + + cdef cppclass CFlightResult" arrow::flight::Result": + CFlightResult() + CFlightResult(CFlightResult) + shared_ptr[CBuffer] body + + cdef cppclass CBasicAuth" arrow::flight::BasicAuth": + CBasicAuth() + CBasicAuth(CBuffer) + CBasicAuth(CBasicAuth) + c_string username + c_string password + + cdef cppclass CResultStream" arrow::flight::ResultStream": + CStatus Next(unique_ptr[CFlightResult]* result) + + cdef cppclass CDescriptorType \ + " arrow::flight::FlightDescriptor::DescriptorType": + bint operator==(CDescriptorType) + + CDescriptorType CDescriptorTypeUnknown\ + " arrow::flight::FlightDescriptor::UNKNOWN" + CDescriptorType CDescriptorTypePath\ + " arrow::flight::FlightDescriptor::PATH" + CDescriptorType CDescriptorTypeCmd\ + " arrow::flight::FlightDescriptor::CMD" + + cdef cppclass CFlightDescriptor" arrow::flight::FlightDescriptor": + CDescriptorType type + c_string cmd + vector[c_string] path + CStatus SerializeToString(c_string* out) + + @staticmethod + CStatus Deserialize(const c_string& serialized, + CFlightDescriptor* out) + bint operator==(CFlightDescriptor) + + cdef cppclass CTicket" arrow::flight::Ticket": + CTicket() + c_string ticket + bint operator==(CTicket) + CStatus SerializeToString(c_string* out) + + @staticmethod + CStatus Deserialize(const c_string& serialized, CTicket* out) + + cdef cppclass CCriteria" arrow::flight::Criteria": + CCriteria() + c_string expression + + cdef cppclass CLocation" arrow::flight::Location": + CLocation() + c_string ToString() + c_bool Equals(const CLocation& other) + + @staticmethod + CStatus Parse(c_string& uri_string, CLocation* location) + + @staticmethod + CStatus ForGrpcTcp(c_string& host, int port, CLocation* location) + + @staticmethod + CStatus ForGrpcTls(c_string& host, int port, CLocation* location) + + @staticmethod + CStatus ForGrpcUnix(c_string& path, CLocation* location) + + cdef cppclass CFlightEndpoint" arrow::flight::FlightEndpoint": + CFlightEndpoint() + + CTicket ticket + vector[CLocation] locations + + bint operator==(CFlightEndpoint) + + cdef cppclass CFlightInfo" arrow::flight::FlightInfo": + CFlightInfo(CFlightInfo info) + int64_t total_records() + int64_t total_bytes() + CStatus GetSchema(CDictionaryMemo* memo, shared_ptr[CSchema]* out) + CFlightDescriptor& descriptor() + const vector[CFlightEndpoint]& endpoints() + CStatus SerializeToString(c_string* out) + + @staticmethod + CStatus Deserialize(const c_string& serialized, + unique_ptr[CFlightInfo]* out) + + cdef cppclass CSchemaResult" arrow::flight::SchemaResult": + CSchemaResult(CSchemaResult result) + CStatus GetSchema(CDictionaryMemo* memo, shared_ptr[CSchema]* out) + + cdef cppclass CFlightListing" arrow::flight::FlightListing": + CStatus Next(unique_ptr[CFlightInfo]* info) + + cdef cppclass CSimpleFlightListing" arrow::flight::SimpleFlightListing": + CSimpleFlightListing(vector[CFlightInfo]&& info) + + cdef cppclass CFlightPayload" arrow::flight::FlightPayload": + shared_ptr[CBuffer] descriptor + shared_ptr[CBuffer] app_metadata + CIpcPayload ipc_message + + cdef cppclass CFlightDataStream" arrow::flight::FlightDataStream": + shared_ptr[CSchema] schema() + CStatus Next(CFlightPayload*) + + cdef cppclass CFlightStreamChunk" arrow::flight::FlightStreamChunk": + CFlightStreamChunk() + shared_ptr[CRecordBatch] data + shared_ptr[CBuffer] app_metadata + + cdef cppclass CMetadataRecordBatchReader \ + " arrow::flight::MetadataRecordBatchReader": + CResult[shared_ptr[CSchema]] GetSchema() + CStatus Next(CFlightStreamChunk* out) + CStatus ReadAll(shared_ptr[CTable]* table) + + CResult[shared_ptr[CRecordBatchReader]] MakeRecordBatchReader\ + " arrow::flight::MakeRecordBatchReader"( + shared_ptr[CMetadataRecordBatchReader]) + + cdef cppclass CMetadataRecordBatchWriter \ + " arrow::flight::MetadataRecordBatchWriter"(CRecordBatchWriter): + CStatus Begin(shared_ptr[CSchema] schema, + const CIpcWriteOptions& options) + CStatus WriteMetadata(shared_ptr[CBuffer] app_metadata) + CStatus WriteWithMetadata(const CRecordBatch& batch, + shared_ptr[CBuffer] app_metadata) + + cdef cppclass CFlightStreamReader \ + " arrow::flight::FlightStreamReader"(CMetadataRecordBatchReader): + void Cancel() + CStatus ReadAllWithStopToken" ReadAll"\ + (shared_ptr[CTable]* table, const CStopToken& stop_token) + + cdef cppclass CFlightMessageReader \ + " arrow::flight::FlightMessageReader"(CMetadataRecordBatchReader): + CFlightDescriptor& descriptor() + + cdef cppclass CFlightMessageWriter \ + " arrow::flight::FlightMessageWriter"(CMetadataRecordBatchWriter): + pass + + cdef cppclass CFlightStreamWriter \ + " arrow::flight::FlightStreamWriter"(CMetadataRecordBatchWriter): + CStatus DoneWriting() + + cdef cppclass CRecordBatchStream \ + " arrow::flight::RecordBatchStream"(CFlightDataStream): + CRecordBatchStream(shared_ptr[CRecordBatchReader]& reader, + const CIpcWriteOptions& options) + + cdef cppclass CFlightMetadataReader" arrow::flight::FlightMetadataReader": + CStatus ReadMetadata(shared_ptr[CBuffer]* out) + + cdef cppclass CFlightMetadataWriter" arrow::flight::FlightMetadataWriter": + CStatus WriteMetadata(const CBuffer& message) + + cdef cppclass CServerAuthReader" arrow::flight::ServerAuthReader": + CStatus Read(c_string* token) + + cdef cppclass CServerAuthSender" arrow::flight::ServerAuthSender": + CStatus Write(c_string& token) + + cdef cppclass CClientAuthReader" arrow::flight::ClientAuthReader": + CStatus Read(c_string* token) + + cdef cppclass CClientAuthSender" arrow::flight::ClientAuthSender": + CStatus Write(c_string& token) + + cdef cppclass CServerAuthHandler" arrow::flight::ServerAuthHandler": + pass + + cdef cppclass CClientAuthHandler" arrow::flight::ClientAuthHandler": + pass + + cdef cppclass CServerCallContext" arrow::flight::ServerCallContext": + c_string& peer_identity() + c_string& peer() + c_bool is_cancelled() + CServerMiddleware* GetMiddleware(const c_string& key) + + cdef cppclass CTimeoutDuration" arrow::flight::TimeoutDuration": + CTimeoutDuration(double) + + cdef cppclass CFlightCallOptions" arrow::flight::FlightCallOptions": + CFlightCallOptions() + CTimeoutDuration timeout + CIpcWriteOptions write_options + vector[pair[c_string, c_string]] headers + CStopToken stop_token + + cdef cppclass CCertKeyPair" arrow::flight::CertKeyPair": + CCertKeyPair() + c_string pem_cert + c_string pem_key + + cdef cppclass CFlightMethod" arrow::flight::FlightMethod": + bint operator==(CFlightMethod) + + CFlightMethod CFlightMethodInvalid\ + " arrow::flight::FlightMethod::Invalid" + CFlightMethod CFlightMethodHandshake\ + " arrow::flight::FlightMethod::Handshake" + CFlightMethod CFlightMethodListFlights\ + " arrow::flight::FlightMethod::ListFlights" + CFlightMethod CFlightMethodGetFlightInfo\ + " arrow::flight::FlightMethod::GetFlightInfo" + CFlightMethod CFlightMethodGetSchema\ + " arrow::flight::FlightMethod::GetSchema" + CFlightMethod CFlightMethodDoGet\ + " arrow::flight::FlightMethod::DoGet" + CFlightMethod CFlightMethodDoPut\ + " arrow::flight::FlightMethod::DoPut" + CFlightMethod CFlightMethodDoAction\ + " arrow::flight::FlightMethod::DoAction" + CFlightMethod CFlightMethodListActions\ + " arrow::flight::FlightMethod::ListActions" + CFlightMethod CFlightMethodDoExchange\ + " arrow::flight::FlightMethod::DoExchange" + + cdef cppclass CCallInfo" arrow::flight::CallInfo": + CFlightMethod method + + # This is really std::unordered_multimap, but Cython has no + # bindings for it, so treat it as an opaque class and bind the + # methods we need + cdef cppclass CCallHeaders" arrow::flight::CallHeaders": + cppclass const_iterator: + pair[c_string, c_string] operator*() + const_iterator operator++() + bint operator==(const_iterator) + bint operator!=(const_iterator) + const_iterator cbegin() + const_iterator cend() + + cdef cppclass CAddCallHeaders" arrow::flight::AddCallHeaders": + void AddHeader(const c_string& key, const c_string& value) + + cdef cppclass CServerMiddleware" arrow::flight::ServerMiddleware": + c_string name() + + cdef cppclass CServerMiddlewareFactory\ + " arrow::flight::ServerMiddlewareFactory": + pass + + cdef cppclass CClientMiddleware" arrow::flight::ClientMiddleware": + pass + + cdef cppclass CClientMiddlewareFactory\ + " arrow::flight::ClientMiddlewareFactory": + pass + + cdef cppclass CFlightServerOptions" arrow::flight::FlightServerOptions": + CFlightServerOptions(const CLocation& location) + CLocation location + unique_ptr[CServerAuthHandler] auth_handler + vector[CCertKeyPair] tls_certificates + c_bool verify_client + c_string root_certificates + vector[pair[c_string, shared_ptr[CServerMiddlewareFactory]]] middleware + + cdef cppclass CFlightClientOptions" arrow::flight::FlightClientOptions": + c_string tls_root_certs + c_string cert_chain + c_string private_key + c_string override_hostname + vector[shared_ptr[CClientMiddlewareFactory]] middleware + int64_t write_size_limit_bytes + vector[pair[c_string, CIntStringVariant]] generic_options + c_bool disable_server_verification + + @staticmethod + CFlightClientOptions Defaults() + + cdef cppclass CFlightClient" arrow::flight::FlightClient": + @staticmethod + CStatus Connect(const CLocation& location, + const CFlightClientOptions& options, + unique_ptr[CFlightClient]* client) + + CStatus Authenticate(CFlightCallOptions& options, + unique_ptr[CClientAuthHandler] auth_handler) + + CResult[pair[c_string, c_string]] AuthenticateBasicToken( + CFlightCallOptions& options, + const c_string& username, + const c_string& password) + + CStatus DoAction(CFlightCallOptions& options, CAction& action, + unique_ptr[CResultStream]* results) + CStatus ListActions(CFlightCallOptions& options, + vector[CActionType]* actions) + + CStatus ListFlights(CFlightCallOptions& options, CCriteria criteria, + unique_ptr[CFlightListing]* listing) + CStatus GetFlightInfo(CFlightCallOptions& options, + CFlightDescriptor& descriptor, + unique_ptr[CFlightInfo]* info) + CStatus GetSchema(CFlightCallOptions& options, + CFlightDescriptor& descriptor, + unique_ptr[CSchemaResult]* result) + CStatus DoGet(CFlightCallOptions& options, CTicket& ticket, + unique_ptr[CFlightStreamReader]* stream) + CStatus DoPut(CFlightCallOptions& options, + CFlightDescriptor& descriptor, + shared_ptr[CSchema]& schema, + unique_ptr[CFlightStreamWriter]* stream, + unique_ptr[CFlightMetadataReader]* reader) + CStatus DoExchange(CFlightCallOptions& options, + CFlightDescriptor& descriptor, + unique_ptr[CFlightStreamWriter]* writer, + unique_ptr[CFlightStreamReader]* reader) + + cdef cppclass CFlightStatusCode" arrow::flight::FlightStatusCode": + bint operator==(CFlightStatusCode) + + CFlightStatusCode CFlightStatusInternal \ + " arrow::flight::FlightStatusCode::Internal" + CFlightStatusCode CFlightStatusTimedOut \ + " arrow::flight::FlightStatusCode::TimedOut" + CFlightStatusCode CFlightStatusCancelled \ + " arrow::flight::FlightStatusCode::Cancelled" + CFlightStatusCode CFlightStatusUnauthenticated \ + " arrow::flight::FlightStatusCode::Unauthenticated" + CFlightStatusCode CFlightStatusUnauthorized \ + " arrow::flight::FlightStatusCode::Unauthorized" + CFlightStatusCode CFlightStatusUnavailable \ + " arrow::flight::FlightStatusCode::Unavailable" + CFlightStatusCode CFlightStatusFailed \ + " arrow::flight::FlightStatusCode::Failed" + + cdef cppclass FlightStatusDetail" arrow::flight::FlightStatusDetail": + CFlightStatusCode code() + c_string extra_info() + + @staticmethod + shared_ptr[FlightStatusDetail] UnwrapStatus(const CStatus& status) + + cdef cppclass FlightWriteSizeStatusDetail\ + " arrow::flight::FlightWriteSizeStatusDetail": + int64_t limit() + int64_t actual() + + @staticmethod + shared_ptr[FlightWriteSizeStatusDetail] UnwrapStatus( + const CStatus& status) + + cdef CStatus MakeFlightError" arrow::flight::MakeFlightError" \ + (CFlightStatusCode code, const c_string& message) + + cdef CStatus MakeFlightError" arrow::flight::MakeFlightError" \ + (CFlightStatusCode code, + const c_string& message, + const c_string& extra_info) + +# Callbacks for implementing Flight servers +# Use typedef to emulate syntax for std::function<void(..)> +ctypedef CStatus cb_list_flights(object, const CServerCallContext&, + const CCriteria*, + unique_ptr[CFlightListing]*) +ctypedef CStatus cb_get_flight_info(object, const CServerCallContext&, + const CFlightDescriptor&, + unique_ptr[CFlightInfo]*) +ctypedef CStatus cb_get_schema(object, const CServerCallContext&, + const CFlightDescriptor&, + unique_ptr[CSchemaResult]*) +ctypedef CStatus cb_do_put(object, const CServerCallContext&, + unique_ptr[CFlightMessageReader], + unique_ptr[CFlightMetadataWriter]) +ctypedef CStatus cb_do_get(object, const CServerCallContext&, + const CTicket&, + unique_ptr[CFlightDataStream]*) +ctypedef CStatus cb_do_exchange(object, const CServerCallContext&, + unique_ptr[CFlightMessageReader], + unique_ptr[CFlightMessageWriter]) +ctypedef CStatus cb_do_action(object, const CServerCallContext&, + const CAction&, + unique_ptr[CResultStream]*) +ctypedef CStatus cb_list_actions(object, const CServerCallContext&, + vector[CActionType]*) +ctypedef CStatus cb_result_next(object, unique_ptr[CFlightResult]*) +ctypedef CStatus cb_data_stream_next(object, CFlightPayload*) +ctypedef CStatus cb_server_authenticate(object, CServerAuthSender*, + CServerAuthReader*) +ctypedef CStatus cb_is_valid(object, const c_string&, c_string*) +ctypedef CStatus cb_client_authenticate(object, CClientAuthSender*, + CClientAuthReader*) +ctypedef CStatus cb_get_token(object, c_string*) + +ctypedef CStatus cb_middleware_sending_headers(object, CAddCallHeaders*) +ctypedef CStatus cb_middleware_call_completed(object, const CStatus&) +ctypedef CStatus cb_client_middleware_received_headers( + object, const CCallHeaders&) +ctypedef CStatus cb_server_middleware_start_call( + object, + const CCallInfo&, + const CCallHeaders&, + shared_ptr[CServerMiddleware]*) +ctypedef CStatus cb_client_middleware_start_call( + object, + const CCallInfo&, + unique_ptr[CClientMiddleware]*) + +cdef extern from "arrow/python/flight.h" namespace "arrow::py::flight" nogil: + cdef cppclass PyFlightServerVtable: + PyFlightServerVtable() + function[cb_list_flights] list_flights + function[cb_get_flight_info] get_flight_info + function[cb_get_schema] get_schema + function[cb_do_put] do_put + function[cb_do_get] do_get + function[cb_do_exchange] do_exchange + function[cb_do_action] do_action + function[cb_list_actions] list_actions + + cdef cppclass PyServerAuthHandlerVtable: + PyServerAuthHandlerVtable() + function[cb_server_authenticate] authenticate + function[cb_is_valid] is_valid + + cdef cppclass PyClientAuthHandlerVtable: + PyClientAuthHandlerVtable() + function[cb_client_authenticate] authenticate + function[cb_get_token] get_token + + cdef cppclass PyFlightServer: + PyFlightServer(object server, PyFlightServerVtable vtable) + + CStatus Init(CFlightServerOptions& options) + int port() + CStatus ServeWithSignals() except * + CStatus Shutdown() + CStatus Wait() + + cdef cppclass PyServerAuthHandler\ + " arrow::py::flight::PyServerAuthHandler"(CServerAuthHandler): + PyServerAuthHandler(object handler, PyServerAuthHandlerVtable vtable) + + cdef cppclass PyClientAuthHandler\ + " arrow::py::flight::PyClientAuthHandler"(CClientAuthHandler): + PyClientAuthHandler(object handler, PyClientAuthHandlerVtable vtable) + + cdef cppclass CPyFlightResultStream\ + " arrow::py::flight::PyFlightResultStream"(CResultStream): + CPyFlightResultStream(object generator, + function[cb_result_next] callback) + + cdef cppclass CPyFlightDataStream\ + " arrow::py::flight::PyFlightDataStream"(CFlightDataStream): + CPyFlightDataStream(object data_source, + unique_ptr[CFlightDataStream] stream) + + cdef cppclass CPyGeneratorFlightDataStream\ + " arrow::py::flight::PyGeneratorFlightDataStream"\ + (CFlightDataStream): + CPyGeneratorFlightDataStream(object generator, + shared_ptr[CSchema] schema, + function[cb_data_stream_next] callback, + const CIpcWriteOptions& options) + + cdef cppclass PyServerMiddlewareVtable\ + " arrow::py::flight::PyServerMiddleware::Vtable": + PyServerMiddlewareVtable() + function[cb_middleware_sending_headers] sending_headers + function[cb_middleware_call_completed] call_completed + + cdef cppclass PyClientMiddlewareVtable\ + " arrow::py::flight::PyClientMiddleware::Vtable": + PyClientMiddlewareVtable() + function[cb_middleware_sending_headers] sending_headers + function[cb_client_middleware_received_headers] received_headers + function[cb_middleware_call_completed] call_completed + + cdef cppclass CPyServerMiddleware\ + " arrow::py::flight::PyServerMiddleware"(CServerMiddleware): + CPyServerMiddleware(object middleware, PyServerMiddlewareVtable vtable) + void* py_object() + + cdef cppclass CPyServerMiddlewareFactory\ + " arrow::py::flight::PyServerMiddlewareFactory"\ + (CServerMiddlewareFactory): + CPyServerMiddlewareFactory( + object factory, + function[cb_server_middleware_start_call] start_call) + + cdef cppclass CPyClientMiddleware\ + " arrow::py::flight::PyClientMiddleware"(CClientMiddleware): + CPyClientMiddleware(object middleware, PyClientMiddlewareVtable vtable) + + cdef cppclass CPyClientMiddlewareFactory\ + " arrow::py::flight::PyClientMiddlewareFactory"\ + (CClientMiddlewareFactory): + CPyClientMiddlewareFactory( + object factory, + function[cb_client_middleware_start_call] start_call) + + cdef CStatus CreateFlightInfo" arrow::py::flight::CreateFlightInfo"( + shared_ptr[CSchema] schema, + CFlightDescriptor& descriptor, + vector[CFlightEndpoint] endpoints, + int64_t total_records, + int64_t total_bytes, + unique_ptr[CFlightInfo]* out) + + cdef CStatus CreateSchemaResult" arrow::py::flight::CreateSchemaResult"( + shared_ptr[CSchema] schema, + unique_ptr[CSchemaResult]* out) + + cdef CStatus DeserializeBasicAuth\ + " arrow::py::flight::DeserializeBasicAuth"( + c_string buf, + unique_ptr[CBasicAuth]* out) + + cdef CStatus SerializeBasicAuth" arrow::py::flight::SerializeBasicAuth"( + CBasicAuth basic_auth, + c_string* out) + + +cdef extern from "arrow/util/variant.h" namespace "arrow" nogil: + cdef cppclass CIntStringVariant" arrow::util::Variant<int, std::string>": + CIntStringVariant() + CIntStringVariant(int) + CIntStringVariant(c_string) diff --git a/src/arrow/python/pyarrow/includes/libarrow_fs.pxd b/src/arrow/python/pyarrow/includes/libarrow_fs.pxd new file mode 100644 index 000000000..9dca5fbf6 --- /dev/null +++ b/src/arrow/python/pyarrow/includes/libarrow_fs.pxd @@ -0,0 +1,296 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# distutils: language = c++ + +from pyarrow.includes.common cimport * +from pyarrow.includes.libarrow cimport * + +cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil: + + ctypedef enum CFileType "arrow::fs::FileType": + CFileType_NotFound "arrow::fs::FileType::NotFound" + CFileType_Unknown "arrow::fs::FileType::Unknown" + CFileType_File "arrow::fs::FileType::File" + CFileType_Directory "arrow::fs::FileType::Directory" + + cdef cppclass CFileInfo "arrow::fs::FileInfo": + CFileInfo() + CFileInfo(CFileInfo&&) + CFileInfo& operator=(CFileInfo&&) + CFileInfo(const CFileInfo&) + CFileInfo& operator=(const CFileInfo&) + + CFileType type() + void set_type(CFileType type) + c_string path() + void set_path(const c_string& path) + c_string base_name() + int64_t size() + void set_size(int64_t size) + c_string extension() + CTimePoint mtime() + void set_mtime(CTimePoint mtime) + + cdef cppclass CFileSelector "arrow::fs::FileSelector": + CFileSelector() + c_string base_dir + c_bool allow_not_found + c_bool recursive + + cdef cppclass CFileLocator "arrow::fs::FileLocator": + shared_ptr[CFileSystem] filesystem + c_string path + + cdef cppclass CFileSystem "arrow::fs::FileSystem": + shared_ptr[CFileSystem] shared_from_this() + c_string type_name() const + CResult[c_string] NormalizePath(c_string path) + CResult[CFileInfo] GetFileInfo(const c_string& path) + CResult[vector[CFileInfo]] GetFileInfo( + const vector[c_string]& paths) + CResult[vector[CFileInfo]] GetFileInfo(const CFileSelector& select) + CStatus CreateDir(const c_string& path, c_bool recursive) + CStatus DeleteDir(const c_string& path) + CStatus DeleteDirContents(const c_string& path) + CStatus DeleteRootDirContents() + CStatus DeleteFile(const c_string& path) + CStatus DeleteFiles(const vector[c_string]& paths) + CStatus Move(const c_string& src, const c_string& dest) + CStatus CopyFile(const c_string& src, const c_string& dest) + CResult[shared_ptr[CInputStream]] OpenInputStream( + const c_string& path) + CResult[shared_ptr[CRandomAccessFile]] OpenInputFile( + const c_string& path) + CResult[shared_ptr[COutputStream]] OpenOutputStream( + const c_string& path, const shared_ptr[const CKeyValueMetadata]&) + CResult[shared_ptr[COutputStream]] OpenAppendStream( + const c_string& path, const shared_ptr[const CKeyValueMetadata]&) + c_bool Equals(const CFileSystem& other) + c_bool Equals(shared_ptr[CFileSystem] other) + + CResult[shared_ptr[CFileSystem]] CFileSystemFromUri \ + "arrow::fs::FileSystemFromUri"(const c_string& uri, c_string* out_path) + CResult[shared_ptr[CFileSystem]] CFileSystemFromUriOrPath \ + "arrow::fs::FileSystemFromUriOrPath"(const c_string& uri, + c_string* out_path) + + cdef cppclass CFileSystemGlobalOptions \ + "arrow::fs::FileSystemGlobalOptions": + c_string tls_ca_file_path + c_string tls_ca_dir_path + + CStatus CFileSystemsInitialize "arrow::fs::Initialize" \ + (const CFileSystemGlobalOptions& options) + + cdef cppclass CLocalFileSystemOptions "arrow::fs::LocalFileSystemOptions": + c_bool use_mmap + + @staticmethod + CLocalFileSystemOptions Defaults() + + c_bool Equals(const CLocalFileSystemOptions& other) + + cdef cppclass CLocalFileSystem "arrow::fs::LocalFileSystem"(CFileSystem): + CLocalFileSystem() + CLocalFileSystem(CLocalFileSystemOptions) + CLocalFileSystemOptions options() + + cdef cppclass CSubTreeFileSystem \ + "arrow::fs::SubTreeFileSystem"(CFileSystem): + CSubTreeFileSystem(const c_string& base_path, + shared_ptr[CFileSystem] base_fs) + c_string base_path() + shared_ptr[CFileSystem] base_fs() + + ctypedef enum CS3LogLevel "arrow::fs::S3LogLevel": + CS3LogLevel_Off "arrow::fs::S3LogLevel::Off" + CS3LogLevel_Fatal "arrow::fs::S3LogLevel::Fatal" + CS3LogLevel_Error "arrow::fs::S3LogLevel::Error" + CS3LogLevel_Warn "arrow::fs::S3LogLevel::Warn" + CS3LogLevel_Info "arrow::fs::S3LogLevel::Info" + CS3LogLevel_Debug "arrow::fs::S3LogLevel::Debug" + CS3LogLevel_Trace "arrow::fs::S3LogLevel::Trace" + + cdef struct CS3GlobalOptions "arrow::fs::S3GlobalOptions": + CS3LogLevel log_level + + cdef cppclass CS3ProxyOptions "arrow::fs::S3ProxyOptions": + c_string scheme + c_string host + int port + c_string username + c_string password + c_bool Equals(const CS3ProxyOptions& other) + + @staticmethod + CResult[CS3ProxyOptions] FromUriString "FromUri"( + const c_string& uri_string) + + ctypedef enum CS3CredentialsKind "arrow::fs::S3CredentialsKind": + CS3CredentialsKind_Anonymous "arrow::fs::S3CredentialsKind::Anonymous" + CS3CredentialsKind_Default "arrow::fs::S3CredentialsKind::Default" + CS3CredentialsKind_Explicit "arrow::fs::S3CredentialsKind::Explicit" + CS3CredentialsKind_Role "arrow::fs::S3CredentialsKind::Role" + CS3CredentialsKind_WebIdentity \ + "arrow::fs::S3CredentialsKind::WebIdentity" + + cdef cppclass CS3Options "arrow::fs::S3Options": + c_string region + c_string endpoint_override + c_string scheme + c_bool background_writes + shared_ptr[const CKeyValueMetadata] default_metadata + c_string role_arn + c_string session_name + c_string external_id + int load_frequency + CS3ProxyOptions proxy_options + CS3CredentialsKind credentials_kind + void ConfigureDefaultCredentials() + void ConfigureAccessKey(const c_string& access_key, + const c_string& secret_key, + const c_string& session_token) + c_string GetAccessKey() + c_string GetSecretKey() + c_string GetSessionToken() + c_bool Equals(const CS3Options& other) + + @staticmethod + CS3Options Defaults() + + @staticmethod + CS3Options Anonymous() + + @staticmethod + CS3Options FromAccessKey(const c_string& access_key, + const c_string& secret_key, + const c_string& session_token) + + @staticmethod + CS3Options FromAssumeRole(const c_string& role_arn, + const c_string& session_name, + const c_string& external_id, + const int load_frequency) + + cdef cppclass CS3FileSystem "arrow::fs::S3FileSystem"(CFileSystem): + @staticmethod + CResult[shared_ptr[CS3FileSystem]] Make(const CS3Options& options) + CS3Options options() + c_string region() + + cdef CStatus CInitializeS3 "arrow::fs::InitializeS3"( + const CS3GlobalOptions& options) + cdef CStatus CFinalizeS3 "arrow::fs::FinalizeS3"() + + cdef cppclass CHdfsOptions "arrow::fs::HdfsOptions": + HdfsConnectionConfig connection_config + int32_t buffer_size + int16_t replication + int64_t default_block_size + + @staticmethod + CResult[CHdfsOptions] FromUriString "FromUri"( + const c_string& uri_string) + void ConfigureEndPoint(c_string host, int port) + void ConfigureDriver(c_bool use_hdfs3) + void ConfigureReplication(int16_t replication) + void ConfigureUser(c_string user_name) + void ConfigureBufferSize(int32_t buffer_size) + void ConfigureBlockSize(int64_t default_block_size) + void ConfigureKerberosTicketCachePath(c_string path) + void ConfigureExtraConf(c_string key, c_string value) + + cdef cppclass CHadoopFileSystem "arrow::fs::HadoopFileSystem"(CFileSystem): + @staticmethod + CResult[shared_ptr[CHadoopFileSystem]] Make( + const CHdfsOptions& options) + CHdfsOptions options() + + cdef cppclass CMockFileSystem "arrow::fs::internal::MockFileSystem"( + CFileSystem): + CMockFileSystem(CTimePoint current_time) + + CStatus CCopyFiles "arrow::fs::CopyFiles"( + const vector[CFileLocator]& sources, + const vector[CFileLocator]& destinations, + const CIOContext& io_context, + int64_t chunk_size, c_bool use_threads) + CStatus CCopyFilesWithSelector "arrow::fs::CopyFiles"( + const shared_ptr[CFileSystem]& source_fs, + const CFileSelector& source_sel, + const shared_ptr[CFileSystem]& destination_fs, + const c_string& destination_base_dir, + const CIOContext& io_context, + int64_t chunk_size, c_bool use_threads) + + +# Callbacks for implementing Python filesystems +# Use typedef to emulate syntax for std::function<void(..)> +ctypedef void CallbackGetTypeName(object, c_string*) +ctypedef c_bool CallbackEquals(object, const CFileSystem&) + +ctypedef void CallbackGetFileInfo(object, const c_string&, CFileInfo*) +ctypedef void CallbackGetFileInfoVector(object, const vector[c_string]&, + vector[CFileInfo]*) +ctypedef void CallbackGetFileInfoSelector(object, const CFileSelector&, + vector[CFileInfo]*) +ctypedef void CallbackCreateDir(object, const c_string&, c_bool) +ctypedef void CallbackDeleteDir(object, const c_string&) +ctypedef void CallbackDeleteDirContents(object, const c_string&) +ctypedef void CallbackDeleteRootDirContents(object) +ctypedef void CallbackDeleteFile(object, const c_string&) +ctypedef void CallbackMove(object, const c_string&, const c_string&) +ctypedef void CallbackCopyFile(object, const c_string&, const c_string&) + +ctypedef void CallbackOpenInputStream(object, const c_string&, + shared_ptr[CInputStream]*) +ctypedef void CallbackOpenInputFile(object, const c_string&, + shared_ptr[CRandomAccessFile]*) +ctypedef void CallbackOpenOutputStream( + object, const c_string&, const shared_ptr[const CKeyValueMetadata]&, + shared_ptr[COutputStream]*) +ctypedef void CallbackNormalizePath(object, const c_string&, c_string*) + +cdef extern from "arrow/python/filesystem.h" namespace "arrow::py::fs" nogil: + + cdef cppclass CPyFileSystemVtable "arrow::py::fs::PyFileSystemVtable": + PyFileSystemVtable() + function[CallbackGetTypeName] get_type_name + function[CallbackEquals] equals + function[CallbackGetFileInfo] get_file_info + function[CallbackGetFileInfoVector] get_file_info_vector + function[CallbackGetFileInfoSelector] get_file_info_selector + function[CallbackCreateDir] create_dir + function[CallbackDeleteDir] delete_dir + function[CallbackDeleteDirContents] delete_dir_contents + function[CallbackDeleteRootDirContents] delete_root_dir_contents + function[CallbackDeleteFile] delete_file + function[CallbackMove] move + function[CallbackCopyFile] copy_file + function[CallbackOpenInputStream] open_input_stream + function[CallbackOpenInputFile] open_input_file + function[CallbackOpenOutputStream] open_output_stream + function[CallbackOpenOutputStream] open_append_stream + function[CallbackNormalizePath] normalize_path + + cdef cppclass CPyFileSystem "arrow::py::fs::PyFileSystem": + @staticmethod + shared_ptr[CPyFileSystem] Make(object handler, + CPyFileSystemVtable vtable) + + PyObject* handler() diff --git a/src/arrow/python/pyarrow/includes/libgandiva.pxd b/src/arrow/python/pyarrow/includes/libgandiva.pxd new file mode 100644 index 000000000..c75977d37 --- /dev/null +++ b/src/arrow/python/pyarrow/includes/libgandiva.pxd @@ -0,0 +1,286 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# distutils: language = c++ + +from libcpp.string cimport string as c_string +from libcpp.unordered_set cimport unordered_set as c_unordered_set +from libc.stdint cimport int64_t, int32_t, uint8_t, uintptr_t + +from pyarrow.includes.common cimport * +from pyarrow.includes.libarrow cimport * + +cdef extern from "gandiva/node.h" namespace "gandiva" nogil: + + cdef cppclass CNode" gandiva::Node": + c_string ToString() + shared_ptr[CDataType] return_type() + + cdef cppclass CExpression" gandiva::Expression": + c_string ToString() + shared_ptr[CNode] root() + shared_ptr[CField] result() + + ctypedef vector[shared_ptr[CNode]] CNodeVector" gandiva::NodeVector" + + ctypedef vector[shared_ptr[CExpression]] \ + CExpressionVector" gandiva::ExpressionVector" + +cdef extern from "gandiva/selection_vector.h" namespace "gandiva" nogil: + + cdef cppclass CSelectionVector" gandiva::SelectionVector": + + shared_ptr[CArray] ToArray() + + enum CSelectionVector_Mode" gandiva::SelectionVector::Mode": + CSelectionVector_Mode_NONE" gandiva::SelectionVector::Mode::MODE_NONE" + CSelectionVector_Mode_UINT16" \ + gandiva::SelectionVector::Mode::MODE_UINT16" + CSelectionVector_Mode_UINT32" \ + gandiva::SelectionVector::Mode::MODE_UINT32" + CSelectionVector_Mode_UINT64" \ + gandiva::SelectionVector::Mode::MODE_UINT64" + + cdef CStatus SelectionVector_MakeInt16\ + "gandiva::SelectionVector::MakeInt16"( + int64_t max_slots, CMemoryPool* pool, + shared_ptr[CSelectionVector]* selection_vector) + + cdef CStatus SelectionVector_MakeInt32\ + "gandiva::SelectionVector::MakeInt32"( + int64_t max_slots, CMemoryPool* pool, + shared_ptr[CSelectionVector]* selection_vector) + + cdef CStatus SelectionVector_MakeInt64\ + "gandiva::SelectionVector::MakeInt64"( + int64_t max_slots, CMemoryPool* pool, + shared_ptr[CSelectionVector]* selection_vector) + +cdef inline CSelectionVector_Mode _ensure_selection_mode(str name) except *: + uppercase = name.upper() + if uppercase == 'NONE': + return CSelectionVector_Mode_NONE + elif uppercase == 'UINT16': + return CSelectionVector_Mode_UINT16 + elif uppercase == 'UINT32': + return CSelectionVector_Mode_UINT32 + elif uppercase == 'UINT64': + return CSelectionVector_Mode_UINT64 + else: + raise ValueError('Invalid value for Selection Mode: {!r}'.format(name)) + +cdef inline str _selection_mode_name(CSelectionVector_Mode ctype): + if ctype == CSelectionVector_Mode_NONE: + return 'NONE' + elif ctype == CSelectionVector_Mode_UINT16: + return 'UINT16' + elif ctype == CSelectionVector_Mode_UINT32: + return 'UINT32' + elif ctype == CSelectionVector_Mode_UINT64: + return 'UINT64' + else: + raise RuntimeError('Unexpected CSelectionVector_Mode value') + +cdef extern from "gandiva/condition.h" namespace "gandiva" nogil: + + cdef cppclass CCondition" gandiva::Condition": + c_string ToString() + shared_ptr[CNode] root() + shared_ptr[CField] result() + +cdef extern from "gandiva/arrow.h" namespace "gandiva" nogil: + + ctypedef vector[shared_ptr[CArray]] CArrayVector" gandiva::ArrayVector" + + +cdef extern from "gandiva/tree_expr_builder.h" namespace "gandiva" nogil: + + cdef shared_ptr[CNode] TreeExprBuilder_MakeBoolLiteral \ + "gandiva::TreeExprBuilder::MakeLiteral"(c_bool value) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeUInt8Literal \ + "gandiva::TreeExprBuilder::MakeLiteral"(uint8_t value) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeUInt16Literal \ + "gandiva::TreeExprBuilder::MakeLiteral"(uint16_t value) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeUInt32Literal \ + "gandiva::TreeExprBuilder::MakeLiteral"(uint32_t value) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeUInt64Literal \ + "gandiva::TreeExprBuilder::MakeLiteral"(uint64_t value) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInt8Literal \ + "gandiva::TreeExprBuilder::MakeLiteral"(int8_t value) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInt16Literal \ + "gandiva::TreeExprBuilder::MakeLiteral"(int16_t value) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInt32Literal \ + "gandiva::TreeExprBuilder::MakeLiteral"(int32_t value) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInt64Literal \ + "gandiva::TreeExprBuilder::MakeLiteral"(int64_t value) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeFloatLiteral \ + "gandiva::TreeExprBuilder::MakeLiteral"(float value) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeDoubleLiteral \ + "gandiva::TreeExprBuilder::MakeLiteral"(double value) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeStringLiteral \ + "gandiva::TreeExprBuilder::MakeStringLiteral"(const c_string& value) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeBinaryLiteral \ + "gandiva::TreeExprBuilder::MakeBinaryLiteral"(const c_string& value) + + cdef shared_ptr[CExpression] TreeExprBuilder_MakeExpression\ + "gandiva::TreeExprBuilder::MakeExpression"( + shared_ptr[CNode] root_node, shared_ptr[CField] result_field) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeFunction \ + "gandiva::TreeExprBuilder::MakeFunction"( + const c_string& name, const CNodeVector& children, + shared_ptr[CDataType] return_type) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeField \ + "gandiva::TreeExprBuilder::MakeField"(shared_ptr[CField] field) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeIf \ + "gandiva::TreeExprBuilder::MakeIf"( + shared_ptr[CNode] condition, shared_ptr[CNode] this_node, + shared_ptr[CNode] else_node, shared_ptr[CDataType] return_type) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeAnd \ + "gandiva::TreeExprBuilder::MakeAnd"(const CNodeVector& children) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeOr \ + "gandiva::TreeExprBuilder::MakeOr"(const CNodeVector& children) + + cdef shared_ptr[CCondition] TreeExprBuilder_MakeCondition \ + "gandiva::TreeExprBuilder::MakeCondition"( + shared_ptr[CNode] condition) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionInt32 \ + "gandiva::TreeExprBuilder::MakeInExpressionInt32"( + shared_ptr[CNode] node, const c_unordered_set[int32_t]& values) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionInt64 \ + "gandiva::TreeExprBuilder::MakeInExpressionInt64"( + shared_ptr[CNode] node, const c_unordered_set[int64_t]& values) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionTime32 \ + "gandiva::TreeExprBuilder::MakeInExpressionTime32"( + shared_ptr[CNode] node, const c_unordered_set[int32_t]& values) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionTime64 \ + "gandiva::TreeExprBuilder::MakeInExpressionTime64"( + shared_ptr[CNode] node, const c_unordered_set[int64_t]& values) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionDate32 \ + "gandiva::TreeExprBuilder::MakeInExpressionDate32"( + shared_ptr[CNode] node, const c_unordered_set[int32_t]& values) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionDate64 \ + "gandiva::TreeExprBuilder::MakeInExpressionDate64"( + shared_ptr[CNode] node, const c_unordered_set[int64_t]& values) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionTimeStamp \ + "gandiva::TreeExprBuilder::MakeInExpressionTimeStamp"( + shared_ptr[CNode] node, const c_unordered_set[int64_t]& values) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionString \ + "gandiva::TreeExprBuilder::MakeInExpressionString"( + shared_ptr[CNode] node, const c_unordered_set[c_string]& values) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionBinary \ + "gandiva::TreeExprBuilder::MakeInExpressionBinary"( + shared_ptr[CNode] node, const c_unordered_set[c_string]& values) + +cdef extern from "gandiva/projector.h" namespace "gandiva" nogil: + + cdef cppclass CProjector" gandiva::Projector": + + CStatus Evaluate( + const CRecordBatch& batch, CMemoryPool* pool, + const CArrayVector* output) + + CStatus Evaluate( + const CRecordBatch& batch, + const CSelectionVector* selection, + CMemoryPool* pool, + const CArrayVector* output) + + c_string DumpIR() + + cdef CStatus Projector_Make \ + "gandiva::Projector::Make"( + shared_ptr[CSchema] schema, const CExpressionVector& children, + shared_ptr[CProjector]* projector) + + cdef CStatus Projector_Make \ + "gandiva::Projector::Make"( + shared_ptr[CSchema] schema, const CExpressionVector& children, + CSelectionVector_Mode mode, + shared_ptr[CConfiguration] configuration, + shared_ptr[CProjector]* projector) + +cdef extern from "gandiva/filter.h" namespace "gandiva" nogil: + + cdef cppclass CFilter" gandiva::Filter": + + CStatus Evaluate( + const CRecordBatch& batch, + shared_ptr[CSelectionVector] out_selection) + + c_string DumpIR() + + cdef CStatus Filter_Make \ + "gandiva::Filter::Make"( + shared_ptr[CSchema] schema, shared_ptr[CCondition] condition, + shared_ptr[CFilter]* filter) + +cdef extern from "gandiva/function_signature.h" namespace "gandiva" nogil: + + cdef cppclass CFunctionSignature" gandiva::FunctionSignature": + + CFunctionSignature(const c_string& base_name, + vector[shared_ptr[CDataType]] param_types, + shared_ptr[CDataType] ret_type) + + shared_ptr[CDataType] ret_type() const + + const c_string& base_name() const + + vector[shared_ptr[CDataType]] param_types() const + + c_string ToString() const + +cdef extern from "gandiva/expression_registry.h" namespace "gandiva" nogil: + + cdef vector[shared_ptr[CFunctionSignature]] \ + GetRegisteredFunctionSignatures() + +cdef extern from "gandiva/configuration.h" namespace "gandiva" nogil: + + cdef cppclass CConfiguration" gandiva::Configuration": + pass + + cdef cppclass CConfigurationBuilder \ + " gandiva::ConfigurationBuilder": + @staticmethod + shared_ptr[CConfiguration] DefaultConfiguration() diff --git a/src/arrow/python/pyarrow/includes/libplasma.pxd b/src/arrow/python/pyarrow/includes/libplasma.pxd new file mode 100644 index 000000000..d54e9f484 --- /dev/null +++ b/src/arrow/python/pyarrow/includes/libplasma.pxd @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# distutils: language = c++ + +from pyarrow.includes.common cimport * + +cdef extern from "plasma/common.h" namespace "plasma" nogil: + cdef c_bool IsPlasmaObjectExists(const CStatus& status) + cdef c_bool IsPlasmaObjectNotFound(const CStatus& status) + cdef c_bool IsPlasmaStoreFull(const CStatus& status) |