summaryrefslogtreecommitdiffstats
path: root/src/arrow/python/pyarrow/_orc.pyx
diff options
context:
space:
mode:
Diffstat (limited to 'src/arrow/python/pyarrow/_orc.pyx')
-rw-r--r--src/arrow/python/pyarrow/_orc.pyx163
1 files changed, 163 insertions, 0 deletions
diff --git a/src/arrow/python/pyarrow/_orc.pyx b/src/arrow/python/pyarrow/_orc.pyx
new file mode 100644
index 000000000..18ca28682
--- /dev/null
+++ b/src/arrow/python/pyarrow/_orc.pyx
@@ -0,0 +1,163 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: profile=False
+# distutils: language = c++
+
+from cython.operator cimport dereference as deref
+from libcpp.vector cimport vector as std_vector
+from libcpp.utility cimport move
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+from pyarrow.lib cimport (check_status, _Weakrefable,
+ MemoryPool, maybe_unbox_memory_pool,
+ Schema, pyarrow_wrap_schema,
+ KeyValueMetadata,
+ pyarrow_wrap_batch,
+ RecordBatch,
+ Table,
+ pyarrow_wrap_table,
+ pyarrow_unwrap_schema,
+ pyarrow_wrap_metadata,
+ pyarrow_unwrap_table,
+ get_reader,
+ get_writer)
+from pyarrow.lib import tobytes
+
+
+cdef class ORCReader(_Weakrefable):
+ cdef:
+ object source
+ CMemoryPool* allocator
+ unique_ptr[ORCFileReader] reader
+
+ def __cinit__(self, MemoryPool memory_pool=None):
+ self.allocator = maybe_unbox_memory_pool(memory_pool)
+
+ def open(self, object source, c_bool use_memory_map=True):
+ cdef:
+ shared_ptr[CRandomAccessFile] rd_handle
+
+ self.source = source
+
+ get_reader(source, use_memory_map, &rd_handle)
+ with nogil:
+ self.reader = move(GetResultValue(
+ ORCFileReader.Open(rd_handle, self.allocator)
+ ))
+
+ def metadata(self):
+ """
+ The arrow metadata for this file.
+
+ Returns
+ -------
+ metadata : pyarrow.KeyValueMetadata
+ """
+ cdef:
+ shared_ptr[const CKeyValueMetadata] sp_arrow_metadata
+
+ with nogil:
+ sp_arrow_metadata = GetResultValue(
+ deref(self.reader).ReadMetadata()
+ )
+
+ return pyarrow_wrap_metadata(sp_arrow_metadata)
+
+ def schema(self):
+ """
+ The arrow schema for this file.
+
+ Returns
+ -------
+ schema : pyarrow.Schema
+ """
+ cdef:
+ shared_ptr[CSchema] sp_arrow_schema
+
+ with nogil:
+ sp_arrow_schema = GetResultValue(deref(self.reader).ReadSchema())
+
+ return pyarrow_wrap_schema(sp_arrow_schema)
+
+ def nrows(self):
+ return deref(self.reader).NumberOfRows()
+
+ def nstripes(self):
+ return deref(self.reader).NumberOfStripes()
+
+ def read_stripe(self, n, columns=None):
+ cdef:
+ shared_ptr[CRecordBatch] sp_record_batch
+ RecordBatch batch
+ int64_t stripe
+ std_vector[c_string] c_names
+
+ stripe = n
+
+ if columns is None:
+ with nogil:
+ sp_record_batch = GetResultValue(
+ deref(self.reader).ReadStripe(stripe)
+ )
+ else:
+ c_names = [tobytes(name) for name in columns]
+ with nogil:
+ sp_record_batch = GetResultValue(
+ deref(self.reader).ReadStripe(stripe, c_names)
+ )
+
+ return pyarrow_wrap_batch(sp_record_batch)
+
+ def read(self, columns=None):
+ cdef:
+ shared_ptr[CTable] sp_table
+ std_vector[c_string] c_names
+
+ if columns is None:
+ with nogil:
+ sp_table = GetResultValue(deref(self.reader).Read())
+ else:
+ c_names = [tobytes(name) for name in columns]
+ with nogil:
+ sp_table = GetResultValue(deref(self.reader).Read(c_names))
+
+ return pyarrow_wrap_table(sp_table)
+
+cdef class ORCWriter(_Weakrefable):
+ cdef:
+ object source
+ unique_ptr[ORCFileWriter] writer
+ shared_ptr[COutputStream] rd_handle
+
+ def open(self, object source):
+ self.source = source
+ get_writer(source, &self.rd_handle)
+ with nogil:
+ self.writer = move(GetResultValue[unique_ptr[ORCFileWriter]](
+ ORCFileWriter.Open(self.rd_handle.get())))
+
+ def write(self, Table table):
+ cdef:
+ shared_ptr[CTable] sp_table
+ sp_table = pyarrow_unwrap_table(table)
+ with nogil:
+ check_status(deref(self.writer).Write(deref(sp_table)))
+
+ def close(self):
+ with nogil:
+ check_status(deref(self.writer).Close())