summaryrefslogtreecommitdiffstats
path: root/src/arrow/c_glib/arrow-dataset-glib/scanner.cpp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/c_glib/arrow-dataset-glib/scanner.cpp
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/c_glib/arrow-dataset-glib/scanner.cpp')
-rw-r--r--src/arrow/c_glib/arrow-dataset-glib/scanner.cpp351
1 files changed, 351 insertions, 0 deletions
diff --git a/src/arrow/c_glib/arrow-dataset-glib/scanner.cpp b/src/arrow/c_glib/arrow-dataset-glib/scanner.cpp
new file mode 100644
index 000000000..51542bb0a
--- /dev/null
+++ b/src/arrow/c_glib/arrow-dataset-glib/scanner.cpp
@@ -0,0 +1,351 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow-glib/error.hpp>
+#include <arrow-glib/expression.hpp>
+#include <arrow-glib/reader.hpp>
+#include <arrow-glib/table.hpp>
+
+#include <arrow-dataset-glib/dataset.hpp>
+#include <arrow-dataset-glib/scanner.hpp>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: scanner
+ * @section_id: scanner
+ * @title: Scanner related classes
+ * @include: arrow-dataset-glib/arrow-dataset-glib.h
+ *
+ * #GADatasetScanner is a class for scanning dataset.
+ *
+ * #GADatasetScannerBuilder is a class for building a scanner.
+ *
+ * Since: 5.0.0
+ */
+
+typedef struct GADatasetScannerPrivate_ {
+ std::shared_ptr<arrow::dataset::Scanner> scanner;
+} GADatasetScannerPrivate;
+
+enum {
+ PROP_SCANNER = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetScanner,
+ gadataset_scanner,
+ G_TYPE_OBJECT)
+
+#define GADATASET_SCANNER_GET_PRIVATE(obj) \
+ static_cast<GADatasetScannerPrivate *>( \
+ gadataset_scanner_get_instance_private( \
+ GADATASET_SCANNER(obj)))
+
+static void
+gadataset_scanner_finalize(GObject *object)
+{
+ auto priv = GADATASET_SCANNER_GET_PRIVATE(object);
+ priv->scanner.~shared_ptr();
+ G_OBJECT_CLASS(gadataset_scanner_parent_class)->finalize(object);
+}
+
+static void
+gadataset_scanner_set_property(GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ auto priv = GADATASET_SCANNER_GET_PRIVATE(object);
+
+ switch (prop_id) {
+ case PROP_SCANNER:
+ priv->scanner =
+ *static_cast<std::shared_ptr<arrow::dataset::Scanner> *>(
+ g_value_get_pointer(value));
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+gadataset_scanner_init(GADatasetScanner *object)
+{
+ auto priv = GADATASET_SCANNER_GET_PRIVATE(object);
+ new(&priv->scanner) std::shared_ptr<arrow::dataset::Scanner>;
+}
+
+static void
+gadataset_scanner_class_init(GADatasetScannerClass *klass)
+{
+ auto gobject_class = G_OBJECT_CLASS(klass);
+ gobject_class->finalize = gadataset_scanner_finalize;
+ gobject_class->set_property = gadataset_scanner_set_property;
+
+ GParamSpec *spec;
+ spec = g_param_spec_pointer("scanner",
+ "Scanner",
+ "The raw std::shared<arrow::dataset::Scanner> *",
+ static_cast<GParamFlags>(G_PARAM_WRITABLE |
+ G_PARAM_CONSTRUCT_ONLY));
+ g_object_class_install_property(gobject_class, PROP_SCANNER, spec);
+}
+
+/**
+ * gadataset_scanner_to_table:
+ * @scanner: A #GADatasetScanner.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full) (nullable):
+ * A newly created #GArrowTable on success, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GArrowTable *
+gadataset_scanner_to_table(GADatasetScanner *scanner,
+ GError **error)
+{
+ auto arrow_scanner = gadataset_scanner_get_raw(scanner);
+ auto arrow_table_result = arrow_scanner->ToTable();
+ if (garrow::check(error, arrow_table_result, "[scanner][to-table]")) {
+ auto arrow_table = *arrow_table_result;
+ return garrow_table_new_raw(&arrow_table);
+ } else {
+ return NULL;
+ }
+}
+
+
+typedef struct GADatasetScannerBuilderPrivate_ {
+ std::shared_ptr<arrow::dataset::ScannerBuilder> scanner_builder;
+} GADatasetScannerBuilderPrivate;
+
+enum {
+ PROP_SCANNER_BUILDER = 1,
+ PROP_USE_ASYNC,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetScannerBuilder,
+ gadataset_scanner_builder,
+ G_TYPE_OBJECT)
+
+#define GADATASET_SCANNER_BUILDER_GET_PRIVATE(obj) \
+ static_cast<GADatasetScannerBuilderPrivate *>( \
+ gadataset_scanner_builder_get_instance_private( \
+ GADATASET_SCANNER_BUILDER(obj)))
+
+static void
+gadataset_scanner_builder_finalize(GObject *object)
+{
+ auto priv = GADATASET_SCANNER_BUILDER_GET_PRIVATE(object);
+ priv->scanner_builder.~shared_ptr();
+ G_OBJECT_CLASS(gadataset_scanner_builder_parent_class)->finalize(object);
+}
+
+static void
+gadataset_scanner_builder_set_property(GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ auto priv = GADATASET_SCANNER_BUILDER_GET_PRIVATE(object);
+
+ switch (prop_id) {
+ case PROP_SCANNER_BUILDER:
+ priv->scanner_builder =
+ *static_cast<std::shared_ptr<arrow::dataset::ScannerBuilder> *>(
+ g_value_get_pointer(value));
+ break;
+ case PROP_USE_ASYNC:
+ garrow::check(nullptr,
+ priv->scanner_builder->UseAsync(g_value_get_boolean(value)),
+ "[scanner-builder][use-async][set]");
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+gadataset_scanner_builder_init(GADatasetScannerBuilder *object)
+{
+ auto priv = GADATASET_SCANNER_BUILDER_GET_PRIVATE(object);
+ new(&priv->scanner_builder) std::shared_ptr<arrow::dataset::ScannerBuilder>;
+}
+
+static void
+gadataset_scanner_builder_class_init(GADatasetScannerBuilderClass *klass)
+{
+ auto gobject_class = G_OBJECT_CLASS(klass);
+ gobject_class->finalize = gadataset_scanner_builder_finalize;
+ gobject_class->set_property = gadataset_scanner_builder_set_property;
+
+ GParamSpec *spec;
+ spec = g_param_spec_pointer("scanner-builder",
+ "Scanner builder",
+ "The raw "
+ "std::shared<arrow::dataset::ScannerBuilder> *",
+ static_cast<GParamFlags>(G_PARAM_WRITABLE |
+ G_PARAM_CONSTRUCT_ONLY));
+ g_object_class_install_property(gobject_class, PROP_SCANNER_BUILDER, spec);
+
+ arrow::dataset::ScanOptions default_options;
+ /**
+ * GADatasetScannerBuilder:use-async:
+ *
+ * Whether or not async mode is used.
+ *
+ * Since: 6.0.0
+ */
+ spec = g_param_spec_boolean("use-async",
+ "Use async",
+ "Whether or not async mode is used",
+ default_options.use_async,
+ static_cast<GParamFlags>(G_PARAM_WRITABLE));
+ g_object_class_install_property(gobject_class, PROP_USE_ASYNC, spec);
+}
+
+/**
+ * gadataset_scanner_builder_new:
+ * @dataset: A #GADatasetDataset to be scanned.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): A newly created #GADatasetScannerBuilder on success,
+ * %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GADatasetScannerBuilder *
+gadataset_scanner_builder_new(GADatasetDataset *dataset, GError **error)
+{
+ auto arrow_dataset = gadataset_dataset_get_raw(dataset);
+ auto arrow_scanner_builder_result = arrow_dataset->NewScan();
+ if (garrow::check(error,
+ arrow_scanner_builder_result,
+ "[scanner-builder][new]")) {
+ auto arrow_scanner_builder = *arrow_scanner_builder_result;
+ return gadataset_scanner_builder_new_raw(&arrow_scanner_builder);
+ } else {
+ return NULL;
+ }
+}
+
+/**
+ * gadataset_scanner_builder_new_record_batch_reader:
+ * @reader: A #GArrowRecordBatchReader that produces record batches.
+ *
+ * Returns: (nullable): A newly created #GADatasetScannerBuilder.
+ *
+ * Since: 6.0.0
+ */
+GADatasetScannerBuilder *
+gadataset_scanner_builder_new_record_batch_reader(
+ GArrowRecordBatchReader *reader)
+{
+ auto arrow_reader = garrow_record_batch_reader_get_raw(reader);
+ auto arrow_scanner_builder =
+ arrow::dataset::ScannerBuilder::FromRecordBatchReader(arrow_reader);
+ return gadataset_scanner_builder_new_raw(&arrow_scanner_builder);
+}
+
+/**
+ * gadataset_scanner_builder_set_filter:
+ * @builder: A #GADatasetScannerBuilder.
+ * @expression: A #GArrowExpression to filter rows with.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 6.0.0
+ */
+gboolean
+gadataset_scanner_builder_set_filter(GADatasetScannerBuilder *builder,
+ GArrowExpression *expression,
+ GError **error)
+{
+ auto arrow_builder = gadataset_scanner_builder_get_raw(builder);
+ auto arrow_expression = garrow_expression_get_raw(expression);
+ return garrow::check(error,
+ arrow_builder->Filter(*arrow_expression),
+ "[scanner-builder][filter][set]");
+}
+
+/**
+ * gadataset_scanner_builder_finish:
+ * @builder: A #GADatasetScannerBuilder.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full) (nullable):
+ * A newly created #GADatasetScanner on success, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GADatasetScanner *
+gadataset_scanner_builder_finish(GADatasetScannerBuilder *builder,
+ GError **error)
+{
+ auto arrow_builder = gadataset_scanner_builder_get_raw(builder);
+ auto arrow_scanner_result = arrow_builder->Finish();
+ if (garrow::check(error, arrow_scanner_result, "[scanner-builder][finish]")) {
+ auto arrow_scanner = *arrow_scanner_result;
+ return gadataset_scanner_new_raw(&arrow_scanner);
+ } else {
+ return NULL;
+ }
+}
+
+
+G_END_DECLS
+
+GADatasetScanner *
+gadataset_scanner_new_raw(
+ std::shared_ptr<arrow::dataset::Scanner> *arrow_scanner)
+{
+ auto scanner =
+ GADATASET_SCANNER(g_object_new(GADATASET_TYPE_SCANNER,
+ "scanner", arrow_scanner,
+ NULL));
+ return scanner;
+}
+
+std::shared_ptr<arrow::dataset::Scanner>
+gadataset_scanner_get_raw(GADatasetScanner *scanner)
+{
+ auto priv = GADATASET_SCANNER_GET_PRIVATE(scanner);
+ return priv->scanner;
+}
+
+GADatasetScannerBuilder *
+gadataset_scanner_builder_new_raw(
+ std::shared_ptr<arrow::dataset::ScannerBuilder> *arrow_scanner_builder)
+{
+ return GADATASET_SCANNER_BUILDER(
+ g_object_new(GADATASET_TYPE_SCANNER_BUILDER,
+ "scanner-builder", arrow_scanner_builder,
+ NULL));
+}
+
+std::shared_ptr<arrow::dataset::ScannerBuilder>
+gadataset_scanner_builder_get_raw(GADatasetScannerBuilder *scanner_builder)
+{
+ auto priv = GADATASET_SCANNER_BUILDER_GET_PRIVATE(scanner_builder);
+ return priv->scanner_builder;
+}