summaryrefslogtreecommitdiffstats
path: root/src/arrow/c_glib/arrow-glib/orc-file-reader.cpp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/c_glib/arrow-glib/orc-file-reader.cpp
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/c_glib/arrow-glib/orc-file-reader.cpp')
-rw-r--r--src/arrow/c_glib/arrow-glib/orc-file-reader.cpp454
1 files changed, 454 insertions, 0 deletions
diff --git a/src/arrow/c_glib/arrow-glib/orc-file-reader.cpp b/src/arrow/c_glib/arrow-glib/orc-file-reader.cpp
new file mode 100644
index 000000000..084198a82
--- /dev/null
+++ b/src/arrow/c_glib/arrow-glib/orc-file-reader.cpp
@@ -0,0 +1,454 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow-glib/error.hpp>
+#include <arrow-glib/input-stream.hpp>
+#include <arrow-glib/orc-file-reader.hpp>
+#include <arrow-glib/record-batch.hpp>
+#include <arrow-glib/schema.hpp>
+#include <arrow-glib/table.hpp>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: orc-file-reader
+ * @section_id: orc-file-reader
+ * @title: ORC reader
+ * @include: arrow-glib/orc-file-reader.h
+ *
+ * #GArrowORCFileReader is a class for reading stripes in ORC file
+ * format from input.
+ */
+
+typedef struct GArrowORCFileReaderPrivate_ {
+ GArrowSeekableInputStream *input;
+ arrow::adapters::orc::ORCFileReader *orc_file_reader;
+ GArray *field_indices;
+} GArrowORCFileReaderPrivate;
+
+enum {
+ PROP_0,
+ PROP_INPUT,
+ PROP_ORC_FILE_READER
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowORCFileReader,
+ garrow_orc_file_reader,
+ G_TYPE_OBJECT);
+
+#define GARROW_ORC_FILE_READER_GET_PRIVATE(obj) \
+ static_cast<GArrowORCFileReaderPrivate *>( \
+ garrow_orc_file_reader_get_instance_private( \
+ GARROW_ORC_FILE_READER(obj)))
+
+static void
+garrow_orc_file_reader_dispose(GObject *object)
+{
+ auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(object);
+
+ if (priv->input) {
+ g_object_unref(priv->input);
+ priv->input = NULL;
+ }
+
+ G_OBJECT_CLASS(garrow_orc_file_reader_parent_class)->dispose(object);
+}
+
+static void
+garrow_orc_file_reader_finalize(GObject *object)
+{
+ auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(object);
+
+ delete priv->orc_file_reader;
+
+ if (priv->field_indices) {
+ g_array_free(priv->field_indices, TRUE);
+ }
+
+ G_OBJECT_CLASS(garrow_orc_file_reader_parent_class)->finalize(object);
+}
+
+static void
+garrow_orc_file_reader_set_property(GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(object);
+
+ switch (prop_id) {
+ case PROP_INPUT:
+ priv->input = GARROW_SEEKABLE_INPUT_STREAM(g_value_dup_object(value));
+ break;
+ case PROP_ORC_FILE_READER:
+ priv->orc_file_reader =
+ static_cast<arrow::adapters::orc::ORCFileReader *>(g_value_get_pointer(value));
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+garrow_orc_file_reader_get_property(GObject *object,
+ guint prop_id,
+ GValue *value,
+ GParamSpec *pspec)
+{
+ auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(object);
+
+ switch (prop_id) {
+ case PROP_INPUT:
+ g_value_set_object(value, priv->input);
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+garrow_orc_file_reader_init(GArrowORCFileReader *object)
+{
+}
+
+static void
+garrow_orc_file_reader_class_init(GArrowORCFileReaderClass *klass)
+{
+ auto gobject_class = G_OBJECT_CLASS(klass);
+
+ gobject_class->dispose = garrow_orc_file_reader_dispose;
+ gobject_class->finalize = garrow_orc_file_reader_finalize;
+ gobject_class->set_property = garrow_orc_file_reader_set_property;
+ gobject_class->get_property = garrow_orc_file_reader_get_property;
+
+ GParamSpec *spec;
+ spec = g_param_spec_object("input",
+ "Input",
+ "The input stream",
+ GARROW_TYPE_SEEKABLE_INPUT_STREAM,
+ static_cast<GParamFlags>(G_PARAM_READWRITE |
+ G_PARAM_CONSTRUCT_ONLY));
+ g_object_class_install_property(gobject_class, PROP_INPUT, spec);
+
+ spec = g_param_spec_pointer("orc-file-reader",
+ "arrow::adapters::orc::ORCFileReader",
+ "The raw arrow::adapters::orc::ORCFileReader *",
+ static_cast<GParamFlags>(G_PARAM_WRITABLE |
+ G_PARAM_CONSTRUCT_ONLY));
+ g_object_class_install_property(gobject_class, PROP_ORC_FILE_READER, spec);
+}
+
+
+/**
+ * garrow_orc_file_reader_new:
+ * @file: The file to be read.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): A newly created #GArrowORCFileReader
+ * or %NULL on error.
+ *
+ * Since: 0.10.0
+ */
+GArrowORCFileReader *
+garrow_orc_file_reader_new(GArrowSeekableInputStream *input,
+ GError **error)
+{
+ auto arrow_random_access_file = garrow_seekable_input_stream_get_raw(input);
+ auto pool = arrow::default_memory_pool();
+ auto arrow_reader_result =
+ arrow::adapters::orc::ORCFileReader::Open(arrow_random_access_file,
+ pool);
+ if (garrow::check(error, arrow_reader_result, "[orc-file-reader][new]")) {
+ return garrow_orc_file_reader_new_raw(input,
+ (*arrow_reader_result).release());
+ } else {
+ return NULL;
+ }
+}
+
+/**
+ * garrow_orc_file_reader_set_field_indexes:
+ * @reader: A #GArrowORCFileReader.
+ * @field_indexes: (nullable) (array length=n_field_indexes):
+ * The field indexes to be read.
+ * @n_field_indexes: The number of the specified indexes.
+ *
+ * Since: 0.10.0
+ *
+ * Deprecated: 0.12.0:
+ * Use garrow_orc_file_reader_set_field_indices() instead.
+ */
+void
+garrow_orc_file_reader_set_field_indexes(GArrowORCFileReader *reader,
+ const gint *field_indexes,
+ guint n_field_indexes)
+{
+ garrow_orc_file_reader_set_field_indices(reader,
+ field_indexes,
+ n_field_indexes);
+}
+
+/**
+ * garrow_orc_file_reader_set_field_indices:
+ * @reader: A #GArrowORCFileReader.
+ * @field_indices: (nullable) (array length=n_field_indices):
+ * The field indices to be read.
+ * @n_field_indices: The number of the specified indices.
+ *
+ * Since: 0.12.0
+ */
+void
+garrow_orc_file_reader_set_field_indices(GArrowORCFileReader *reader,
+ const gint *field_indices,
+ guint n_field_indices)
+{
+ auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader);
+ if (priv->field_indices) {
+ g_array_free(priv->field_indices, TRUE);
+ }
+ if (n_field_indices == 0) {
+ priv->field_indices = NULL;
+ } else {
+ priv->field_indices = g_array_sized_new(FALSE,
+ FALSE,
+ sizeof(gint),
+ n_field_indices);
+ g_array_append_vals(priv->field_indices, field_indices, n_field_indices);
+ }
+}
+
+/**
+ * garrow_orc_file_reader_get_field_indexes:
+ * @reader: A #GArrowORCFileReader.
+ * @n_field_indexes: The number of the specified indexes.
+ *
+ * Returns: (nullable) (array length=n_field_indexes) (transfer none):
+ * The field indexes to be read.
+ *
+ * Since: 0.10.0
+ *
+ * Deprecated: 0.12.0:
+ * Use garrow_orc_file_reader_get_field_indices() instead.
+ */
+const gint *
+garrow_orc_file_reader_get_field_indexes(GArrowORCFileReader *reader,
+ guint *n_field_indexes)
+{
+ return garrow_orc_file_reader_get_field_indices(reader, n_field_indexes);
+}
+
+/**
+ * garrow_orc_file_reader_get_field_indices:
+ * @reader: A #GArrowORCFileReader.
+ * @n_field_indices: The number of the specified indices.
+ *
+ * Returns: (nullable) (array length=n_field_indices) (transfer none):
+ * The field indices to be read.
+ *
+ * Since: 0.12.0
+ */
+const gint *
+garrow_orc_file_reader_get_field_indices(GArrowORCFileReader *reader,
+ guint *n_field_indices)
+{
+ auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader);
+ if (priv->field_indices) {
+ *n_field_indices = priv->field_indices->len;
+ return reinterpret_cast<gint *>(priv->field_indices->data);
+ } else {
+ *n_field_indices = 0;
+ return NULL;
+ }
+}
+
+/**
+ * garrow_orc_file_reader_read_type:
+ * @reader: A #GArrowORCFileReader.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable) (transfer full): A newly read type as
+ * #GArrowSchema or %NULL on error.
+ *
+ * Since: 0.10.0
+ */
+GArrowSchema *
+garrow_orc_file_reader_read_type(GArrowORCFileReader *reader,
+ GError **error)
+{
+ auto arrow_reader = garrow_orc_file_reader_get_raw(reader);
+ auto arrow_schema_result = arrow_reader->ReadSchema();
+ if (garrow::check(error,
+ arrow_schema_result,
+ "[orc-file-reader][read-type]")) {
+ auto arrow_schema = *arrow_schema_result;
+ return garrow_schema_new_raw(&arrow_schema);
+ } else {
+ return NULL;
+ }
+}
+
+/**
+ * garrow_orc_file_reader_read_stripes:
+ * @reader: A #GArrowORCFileReader.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable) (transfer full): A newly read stripes as
+ * #GArrowTable or %NULL on error.
+ *
+ * Since: 0.10.0
+ */
+GArrowTable *
+garrow_orc_file_reader_read_stripes(GArrowORCFileReader *reader,
+ GError **error)
+{
+ auto arrow_reader = garrow_orc_file_reader_get_raw(reader);
+ auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader);
+ if (priv->field_indices) {
+ std::vector<int> arrow_field_indices;
+ auto field_indices = priv->field_indices;
+ for (guint i = 0; i < field_indices->len; ++i) {
+ arrow_field_indices.push_back(g_array_index(field_indices, gint, i));
+ }
+ auto arrow_table_result = arrow_reader->Read(arrow_field_indices);
+ if (garrow::check(error,
+ arrow_table_result,
+ "[orc-file-reader][read-stripes]")) {
+ auto arrow_table = *arrow_table_result;
+ return garrow_table_new_raw(&arrow_table);
+ } else {
+ return NULL;
+ }
+ } else {
+ auto arrow_table_result = arrow_reader->Read();
+ if (garrow::check(error,
+ arrow_table_result,
+ "[orc-file-reader][read-stripes]")) {
+ auto arrow_table = *arrow_table_result;
+ return garrow_table_new_raw(&arrow_table);
+ } else {
+ return NULL;
+ }
+ }
+}
+
+/**
+ * garrow_orc_file_reader_read_stripe:
+ * @reader: A #GArrowORCFileReader.
+ * @i: The stripe index to be read.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable) (transfer full): A newly read stripe as
+ * #GArrowRecordBatch or %NULL on error.
+ *
+ * Since: 0.10.0
+ */
+GArrowRecordBatch *
+garrow_orc_file_reader_read_stripe(GArrowORCFileReader *reader,
+ gint64 i,
+ GError **error)
+{
+ auto arrow_reader = garrow_orc_file_reader_get_raw(reader);
+ if (i < 0) {
+ i += arrow_reader->NumberOfStripes();
+ }
+ auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader);
+ if (priv->field_indices) {
+ std::vector<int> arrow_field_indices;
+ auto field_indices = priv->field_indices;
+ for (guint j = 0; j < field_indices->len; ++j) {
+ arrow_field_indices.push_back(g_array_index(field_indices, gint, j));
+ }
+ std::shared_ptr<arrow::RecordBatch> arrow_record_batch;
+ auto arrow_record_batch_result =
+ arrow_reader->ReadStripe(i, arrow_field_indices);
+ if (garrow::check(error,
+ arrow_record_batch_result,
+ "[orc-file-reader][read-stripe]")) {
+ auto arrow_record_batch = *arrow_record_batch_result;
+ return garrow_record_batch_new_raw(&arrow_record_batch);
+ } else {
+ return NULL;
+ }
+ } else {
+ auto arrow_record_batch_result = arrow_reader->ReadStripe(i);
+ if (garrow::check(error,
+ arrow_record_batch_result,
+ "[orc-file-reader][read-stripe]")) {
+ auto arrow_record_batch = *arrow_record_batch_result;
+ return garrow_record_batch_new_raw(&arrow_record_batch);
+ } else {
+ return NULL;
+ }
+ }
+}
+
+/**
+ * garrow_orc_file_reader_get_n_stripes:
+ * @reader: A #GArrowORCFileReader.
+ *
+ * Returns: The number of stripes in the file.
+ *
+ * Since: 0.10.0
+ */
+gint64
+garrow_orc_file_reader_get_n_stripes(GArrowORCFileReader *reader)
+{
+ auto arrow_reader = garrow_orc_file_reader_get_raw(reader);
+ return arrow_reader->NumberOfStripes();
+}
+
+/**
+ * garrow_orc_file_reader_get_n_rows:
+ * @reader: A #GArrowORCFileReader.
+ *
+ * Returns: The number of rows in the file.
+ *
+ * Since: 0.10.0
+ */
+gint64
+garrow_orc_file_reader_get_n_rows(GArrowORCFileReader *reader)
+{
+ auto arrow_reader = garrow_orc_file_reader_get_raw(reader);
+ return arrow_reader->NumberOfRows();
+}
+
+
+G_END_DECLS
+
+
+GArrowORCFileReader *
+garrow_orc_file_reader_new_raw(GArrowSeekableInputStream *input,
+ arrow::adapters::orc::ORCFileReader *arrow_reader)
+{
+ auto reader =
+ GARROW_ORC_FILE_READER(g_object_new(GARROW_TYPE_ORC_FILE_READER,
+ "input", input,
+ "orc-file-reader", arrow_reader,
+ NULL));
+ return reader;
+}
+
+arrow::adapters::orc::ORCFileReader *
+garrow_orc_file_reader_get_raw(GArrowORCFileReader *reader)
+{
+ auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader);
+ return priv->orc_file_reader;
+}