summaryrefslogtreecommitdiffstats
path: root/src/arrow/c_glib/arrow-dataset-glib
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/c_glib/arrow-dataset-glib
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/c_glib/arrow-dataset-glib')
-rw-r--r--src/arrow/c_glib/arrow-dataset-glib/arrow-dataset-glib.h30
-rw-r--r--src/arrow/c_glib/arrow-dataset-glib/arrow-dataset-glib.hpp29
-rw-r--r--src/arrow/c_glib/arrow-dataset-glib/dataset-factory.cpp552
-rw-r--r--src/arrow/c_glib/arrow-dataset-glib/dataset-factory.h98
-rw-r--r--src/arrow/c_glib/arrow-dataset-glib/dataset-factory.hpp27
-rw-r--r--src/arrow/c_glib/arrow-dataset-glib/dataset.cpp736
-rw-r--r--src/arrow/c_glib/arrow-dataset-glib/dataset.h90
-rw-r--r--src/arrow/c_glib/arrow-dataset-glib/dataset.hpp46
-rw-r--r--src/arrow/c_glib/arrow-dataset-glib/enums.c.template52
-rw-r--r--src/arrow/c_glib/arrow-dataset-glib/enums.h.template41
-rw-r--r--src/arrow/c_glib/arrow-dataset-glib/file-format.cpp574
-rw-r--r--src/arrow/c_glib/arrow-dataset-glib/file-format.h146
-rw-r--r--src/arrow/c_glib/arrow-dataset-glib/file-format.hpp44
-rw-r--r--src/arrow/c_glib/arrow-dataset-glib/fragment.cpp187
-rw-r--r--src/arrow/c_glib/arrow-dataset-glib/fragment.h59
-rw-r--r--src/arrow/c_glib/arrow-dataset-glib/fragment.hpp35
-rw-r--r--src/arrow/c_glib/arrow-dataset-glib/meson.build104
-rw-r--r--src/arrow/c_glib/arrow-dataset-glib/partitioning.cpp440
-rw-r--r--src/arrow/c_glib/arrow-dataset-glib/partitioning.h110
-rw-r--r--src/arrow/c_glib/arrow-dataset-glib/partitioning.hpp31
-rw-r--r--src/arrow/c_glib/arrow-dataset-glib/scanner.cpp351
-rw-r--r--src/arrow/c_glib/arrow-dataset-glib/scanner.h74
-rw-r--r--src/arrow/c_glib/arrow-dataset-glib/scanner.hpp37
23 files changed, 3893 insertions, 0 deletions
diff --git a/src/arrow/c_glib/arrow-dataset-glib/arrow-dataset-glib.h b/src/arrow/c_glib/arrow-dataset-glib/arrow-dataset-glib.h
new file mode 100644
index 000000000..58f4e216c
--- /dev/null
+++ b/src/arrow/c_glib/arrow-dataset-glib/arrow-dataset-glib.h
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-glib/arrow-glib.h>
+
+#include <arrow-dataset-glib/dataset-factory.h>
+#include <arrow-dataset-glib/dataset.h>
+#include <arrow-dataset-glib/enums.h>
+#include <arrow-dataset-glib/file-format.h>
+#include <arrow-dataset-glib/fragment.h>
+#include <arrow-dataset-glib/partitioning.h>
+#include <arrow-dataset-glib/scanner.h>
diff --git a/src/arrow/c_glib/arrow-dataset-glib/arrow-dataset-glib.hpp b/src/arrow/c_glib/arrow-dataset-glib/arrow-dataset-glib.hpp
new file mode 100644
index 000000000..8e9965068
--- /dev/null
+++ b/src/arrow/c_glib/arrow-dataset-glib/arrow-dataset-glib.hpp
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-glib/arrow-glib.hpp>
+
+#include <arrow-dataset-glib/dataset-factory.hpp>
+#include <arrow-dataset-glib/dataset.hpp>
+#include <arrow-dataset-glib/file-format.hpp>
+#include <arrow-dataset-glib/fragment.hpp>
+#include <arrow-dataset-glib/partitioning.hpp>
+#include <arrow-dataset-glib/scanner.hpp>
diff --git a/src/arrow/c_glib/arrow-dataset-glib/dataset-factory.cpp b/src/arrow/c_glib/arrow-dataset-glib/dataset-factory.cpp
new file mode 100644
index 000000000..1e532760a
--- /dev/null
+++ b/src/arrow/c_glib/arrow-dataset-glib/dataset-factory.cpp
@@ -0,0 +1,552 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow-glib/error.hpp>
+#include <arrow-glib/file-system.hpp>
+
+#include <arrow-dataset-glib/dataset-factory.hpp>
+#include <arrow-dataset-glib/dataset.hpp>
+#include <arrow-dataset-glib/file-format.hpp>
+#include <arrow-dataset-glib/partitioning.hpp>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: dataset-factory
+ * @section_id: dataset-factory
+ * @title: Dataset factory related classes
+ * @include: arrow-dataset-glib/arrow-dataset-glib.h
+ *
+ * #GADatasetDatasetFactory is a base class for dataset factories.
+ *
+ * #GADatasetFileSystemDatasetFactory is a class for
+ * #GADatasetFileSystemDataset factory.
+ *
+ * Since: 5.0.0
+ */
+
+typedef struct GADatasetDatasetFactoryPrivate_ {
+ std::shared_ptr<arrow::dataset::DatasetFactory> factory;
+} GADatasetDatasetFactoryPrivate;
+
+enum {
+ PROP_DATASET_FACTORY = 1,
+};
+
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADatasetDatasetFactory,
+ gadataset_dataset_factory,
+ G_TYPE_OBJECT)
+
+#define GADATASET_DATASET_FACTORY_GET_PRIVATE(obj) \
+ static_cast<GADatasetDatasetFactoryPrivate *>( \
+ gadataset_dataset_factory_get_instance_private( \
+ GADATASET_DATASET_FACTORY(obj)))
+
+static void
+gadataset_dataset_factory_finalize(GObject *object)
+{
+ auto priv = GADATASET_DATASET_FACTORY_GET_PRIVATE(object);
+ priv->factory.~shared_ptr();
+ G_OBJECT_CLASS(gadataset_dataset_factory_parent_class)->finalize(object);
+}
+
+static void
+gadataset_dataset_factory_set_property(GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ auto priv = GADATASET_DATASET_FACTORY_GET_PRIVATE(object);
+
+ switch (prop_id) {
+ case PROP_DATASET_FACTORY:
+ {
+ auto arrow_factory_pointer =
+ static_cast<std::shared_ptr<arrow::dataset::DatasetFactory> *>(
+ g_value_get_pointer(value));
+ if (arrow_factory_pointer) {
+ priv->factory = *arrow_factory_pointer;
+ }
+ }
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+gadataset_dataset_factory_init(GADatasetDatasetFactory *object)
+{
+ auto priv = GADATASET_DATASET_FACTORY_GET_PRIVATE(object);
+ new(&priv->factory) std::shared_ptr<arrow::dataset::DatasetFactory>;
+}
+
+static void
+gadataset_dataset_factory_class_init(GADatasetDatasetFactoryClass *klass)
+{
+ auto gobject_class = G_OBJECT_CLASS(klass);
+ gobject_class->finalize = gadataset_dataset_factory_finalize;
+ gobject_class->set_property = gadataset_dataset_factory_set_property;
+
+ GParamSpec *spec;
+ spec = g_param_spec_pointer("dataset-factory",
+ "Dataset factory",
+ "The raw "
+ "std::shared<arrow::dataset::DatasetFactory> *",
+ static_cast<GParamFlags>(G_PARAM_WRITABLE |
+ G_PARAM_CONSTRUCT_ONLY));
+ g_object_class_install_property(gobject_class, PROP_DATASET_FACTORY, spec);
+}
+
+/**
+ * gadataset_dataset_factory_finish:
+ * @factory: A #GADatasetDatasetFactory.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full) (nullable):
+ * A newly created #GADatasetDataset on success, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GADatasetDataset *
+gadataset_dataset_factory_finish(GADatasetDatasetFactory *factory,
+ GError **error)
+{
+ auto arrow_factory = gadataset_dataset_factory_get_raw(factory);
+ auto arrow_dataset_result = arrow_factory->Finish();
+ if (garrow::check(error, arrow_dataset_result, "[dataset-factory][finish]")) {
+ auto arrow_dataset = *arrow_dataset_result;
+ return gadataset_dataset_new_raw(&arrow_dataset);
+ } else {
+ return NULL;
+ }
+}
+
+
+typedef struct GADatasetFileSystemDatasetFactoryPrivate_ {
+ GADatasetFileFormat *format;
+ GArrowFileSystem *file_system;
+ GADatasetPartitioning *partitioning;
+ GList *files;
+ arrow::dataset::FileSystemFactoryOptions options;
+} GADatasetFileSystemDatasetFactoryPrivate;
+
+enum {
+ PROP_FORMAT = 1,
+ PROP_FILE_SYSTEM,
+ PROP_PARTITIONING,
+ PROP_PARTITION_BASE_DIR,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileSystemDatasetFactory,
+ gadataset_file_system_dataset_factory,
+ GADATASET_TYPE_DATASET_FACTORY)
+
+#define GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(obj) \
+ static_cast<GADatasetFileSystemDatasetFactoryPrivate *>( \
+ gadataset_file_system_dataset_factory_get_instance_private( \
+ GADATASET_FILE_SYSTEM_DATASET_FACTORY(obj)))
+
+static void
+gadataset_file_system_dataset_factory_dispose(GObject *object)
+{
+ auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(object);
+
+ if (priv->format) {
+ g_object_unref(priv->format);
+ priv->format = NULL;
+ }
+
+ if (priv->file_system) {
+ g_object_unref(priv->file_system);
+ priv->file_system = NULL;
+ }
+
+ if (priv->partitioning) {
+ g_object_unref(priv->partitioning);
+ priv->partitioning = NULL;
+ }
+
+ if (priv->files) {
+ g_list_free_full(priv->files, g_object_unref);
+ priv->files = NULL;
+ }
+
+ G_OBJECT_CLASS(
+ gadataset_file_system_dataset_factory_parent_class)->dispose(object);
+}
+
+static void
+gadataset_file_system_dataset_factory_finalize(GObject *object)
+{
+ auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(object);
+ priv->options.~FileSystemFactoryOptions();
+ G_OBJECT_CLASS(
+ gadataset_file_system_dataset_factory_parent_class)->finalize(object);
+}
+
+static void
+gadataset_file_system_dataset_factory_set_property(GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(object);
+
+ switch (prop_id) {
+ case PROP_FORMAT:
+ priv->format = GADATASET_FILE_FORMAT(g_value_dup_object(value));
+ break;
+ case PROP_PARTITIONING:
+ {
+ auto partitioning = g_value_get_object(value);
+ if (partitioning == priv->partitioning) {
+ break;
+ }
+ auto old_partitioning = priv->partitioning;
+ if (partitioning) {
+ g_object_ref(partitioning);
+ priv->partitioning = GADATASET_PARTITIONING(partitioning);
+ priv->options.partitioning =
+ gadataset_partitioning_get_raw(priv->partitioning);
+ } else {
+ priv->options.partitioning = arrow::dataset::Partitioning::Default();
+ }
+ if (old_partitioning) {
+ g_object_unref(old_partitioning);
+ }
+ }
+ break;
+ case PROP_PARTITION_BASE_DIR:
+ priv->options.partition_base_dir = g_value_get_string(value);
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+gadataset_file_system_dataset_factory_get_property(GObject *object,
+ guint prop_id,
+ GValue *value,
+ GParamSpec *pspec)
+{
+ auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(object);
+
+ switch (prop_id) {
+ case PROP_FORMAT:
+ g_value_set_object(value, priv->format);
+ break;
+ case PROP_FILE_SYSTEM:
+ g_value_set_object(value, priv->file_system);
+ break;
+ case PROP_PARTITIONING:
+ g_value_set_object(value, priv->partitioning);
+ break;
+ case PROP_PARTITION_BASE_DIR:
+ g_value_set_string(value, priv->options.partition_base_dir.c_str());
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+gadataset_file_system_dataset_factory_init(
+ GADatasetFileSystemDatasetFactory *object)
+{
+ auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(object);
+ new(&priv->options) arrow::dataset::FileSystemFactoryOptions;
+}
+
+static void
+gadataset_file_system_dataset_factory_class_init(
+ GADatasetFileSystemDatasetFactoryClass *klass)
+{
+ auto gobject_class = G_OBJECT_CLASS(klass);
+ gobject_class->dispose = gadataset_file_system_dataset_factory_dispose;
+ gobject_class->finalize = gadataset_file_system_dataset_factory_finalize;
+ gobject_class->set_property = gadataset_file_system_dataset_factory_set_property;
+ gobject_class->get_property = gadataset_file_system_dataset_factory_get_property;
+
+ GParamSpec *spec;
+ /**
+ * GADatasetFileSystemDatasetFactory:format:
+ *
+ * Format passed to #GADatasetFileSystemDataset.
+ *
+ * Since: 5.0.0
+ */
+ spec = g_param_spec_object("format",
+ "Format",
+ "Format passed to GADatasetFileSystemDataset",
+ GADATASET_TYPE_FILE_FORMAT,
+ static_cast<GParamFlags>(G_PARAM_READWRITE |
+ G_PARAM_CONSTRUCT_ONLY));
+ g_object_class_install_property(gobject_class, PROP_FORMAT, spec);
+
+ /**
+ * GADatasetFileSystemDatasetFactory:file-system:
+ *
+ * File system passed to #GADatasetFileSystemDataset.
+ *
+ * Since: 5.0.0
+ */
+ spec = g_param_spec_object("file-system",
+ "File system",
+ "File system passed to GADatasetFileSystemDataset",
+ GARROW_TYPE_FILE_SYSTEM,
+ static_cast<GParamFlags>(G_PARAM_READABLE));
+ g_object_class_install_property(gobject_class, PROP_FILE_SYSTEM, spec);
+
+ /**
+ * GADatasetFileSystemDatasetFactory:partitioning:
+ *
+ * Partitioning used by #GADatasetFileSystemDataset.
+ *
+ * Since: 6.0.0
+ */
+ spec = g_param_spec_object("partitioning",
+ "Partitioning",
+ "Partitioning used by GADatasetFileSystemDataset",
+ GADATASET_TYPE_PARTITIONING,
+ static_cast<GParamFlags>(G_PARAM_READWRITE));
+ g_object_class_install_property(gobject_class, PROP_PARTITIONING, spec);
+
+ /**
+ * GADatasetFileSystemDatasetFactory:partition-base-dir:
+ *
+ * Partition base directory used by #GADatasetFileSystemDataset.
+ *
+ * Since: 6.0.0
+ */
+ spec = g_param_spec_string("partition-base-dir",
+ "Partition base directory",
+ "Partition base directory "
+ "used by GADatasetFileSystemDataset",
+ NULL,
+ static_cast<GParamFlags>(G_PARAM_READWRITE));
+ g_object_class_install_property(gobject_class, PROP_PARTITION_BASE_DIR, spec);
+}
+
+/**
+ * gadataset_file_system_factory_new:
+ * @format: A #GADatasetFileFormat.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: A newly created #GADatasetDatasetFileSystemFactory on success,
+ * %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GADatasetFileSystemDatasetFactory *
+gadataset_file_system_dataset_factory_new(GADatasetFileFormat *format)
+{
+ return GADATASET_FILE_SYSTEM_DATASET_FACTORY(
+ g_object_new(GADATASET_TYPE_FILE_SYSTEM_DATASET_FACTORY,
+ "format", format,
+ NULL));
+}
+
+/**
+ * gadataset_file_system_dataset_factory_set_file_system:
+ * @factory: A #GADatasetFileSystemDatasetFactory.
+ * @file_system: A #GArrowFileSystem.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+gadataset_file_system_dataset_factory_set_file_system(
+ GADatasetFileSystemDatasetFactory *factory,
+ GArrowFileSystem *file_system,
+ GError **error)
+{
+ const gchar *context = "[file-system-dataset-factory][set-file-system]";
+ auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(factory);
+ if (priv->file_system) {
+ garrow::check(error,
+ arrow::Status::Invalid("file system is already set"),
+ context);
+ return FALSE;
+ }
+ priv->file_system = file_system;
+ g_object_ref(priv->file_system);
+ return TRUE;
+}
+
+/**
+ * gadataset_file_system_dataset_factory_set_file_system_uri:
+ * @factory: A #GADatasetFileSystemDatasetFactory.
+ * @uri: An URI for file system.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+gadataset_file_system_dataset_factory_set_file_system_uri(
+ GADatasetFileSystemDatasetFactory *factory,
+ const gchar *uri,
+ GError **error)
+{
+ const gchar *context = "[file-system-dataset-factory][set-file-system-uri]";
+ auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(factory);
+ if (priv->file_system) {
+ garrow::check(error,
+ arrow::Status::Invalid("file system is already set"),
+ context);
+ return FALSE;
+ }
+ std::string internal_path;
+ auto arrow_file_system_result =
+ arrow::fs::FileSystemFromUri(uri, &internal_path);
+ if (!garrow::check(error, arrow_file_system_result, context)) {
+ return FALSE;
+ }
+ auto arrow_file_system = *arrow_file_system_result;
+ auto arrow_file_info_result = arrow_file_system->GetFileInfo(internal_path);
+ if (!garrow::check(error, arrow_file_info_result, context)) {
+ return FALSE;
+ }
+ priv->file_system = garrow_file_system_new_raw(&arrow_file_system);
+ auto file_info = garrow_file_info_new_raw(*arrow_file_info_result);
+ priv->files = g_list_prepend(priv->files, file_info);
+ return TRUE;
+}
+
+/**
+ * gadataset_file_system_dataset_factory_add_path:
+ * @factory: A #GADatasetFileSystemDatasetFactory.
+ * @path: A path to be added.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+gadataset_file_system_dataset_factory_add_path(
+ GADatasetFileSystemDatasetFactory *factory,
+ const gchar *path,
+ GError **error)
+{
+ const gchar *context = "[file-system-dataset-factory][add-path]";
+ auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(factory);
+ if (!priv->file_system) {
+ garrow::check(error,
+ arrow::Status::Invalid("file system isn't set"),
+ context);
+ return FALSE;
+ }
+ auto arrow_file_system = garrow_file_system_get_raw(priv->file_system);
+ auto arrow_file_info_result = arrow_file_system->GetFileInfo(path);
+ if (!garrow::check(error, arrow_file_info_result, context)) {
+ return FALSE;
+ }
+ auto file_info = garrow_file_info_new_raw(*arrow_file_info_result);
+ priv->files = g_list_prepend(priv->files, file_info);
+ return TRUE;
+}
+
+/**
+ * gadataset_file_system_dataset_factory_finish:
+ * @factory: A #GADatasetFileSystemDatasetFactory.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full) (nullable):
+ * A newly created #GADatasetFileSystemDataset on success, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GADatasetFileSystemDataset *
+gadataset_file_system_dataset_factory_finish(
+ GADatasetFileSystemDatasetFactory *factory,
+ GError **error)
+{
+ const gchar *context = "[file-system-dataset-factory][finish]";
+ auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(factory);
+ if (!priv->file_system) {
+ garrow::check(error,
+ arrow::Status::Invalid("file system isn't set"),
+ context);
+ return NULL;
+ }
+ auto arrow_file_system = garrow_file_system_get_raw(priv->file_system);
+ auto arrow_format = gadataset_file_format_get_raw(priv->format);
+ arrow::Result<std::shared_ptr<arrow::dataset::DatasetFactory>>
+ arrow_factory_result;
+ if (priv->files &&
+ !priv->files->next &&
+ garrow_file_info_is_dir(GARROW_FILE_INFO(priv->files->data))) {
+ auto file = GARROW_FILE_INFO(priv->files->data);
+ arrow::fs::FileSelector arrow_selector;
+ arrow_selector.base_dir = garrow_file_info_get_raw(file)->path();
+ arrow_selector.recursive = true;
+ arrow_factory_result =
+ arrow::dataset::FileSystemDatasetFactory::Make(arrow_file_system,
+ arrow_selector,
+ arrow_format,
+ priv->options);
+ } else {
+ std::vector<arrow::fs::FileInfo> arrow_files;
+ priv->files = g_list_reverse(priv->files);
+ for (auto node = priv->files; node; node = node->next) {
+ auto file = GARROW_FILE_INFO(node->data);
+ arrow_files.push_back(*garrow_file_info_get_raw(file));
+ }
+ priv->files = g_list_reverse(priv->files);
+ arrow_factory_result =
+ arrow::dataset::FileSystemDatasetFactory::Make(arrow_file_system,
+ arrow_files,
+ arrow_format,
+ priv->options);
+ }
+ if (!garrow::check(error, arrow_factory_result, context)) {
+ return NULL;
+ }
+ auto arrow_dataset_result = (*arrow_factory_result)->Finish();
+ if (!garrow::check(error, arrow_dataset_result, context)) {
+ return NULL;
+ }
+ auto arrow_dataset = *arrow_dataset_result;
+ return GADATASET_FILE_SYSTEM_DATASET(
+ gadataset_dataset_new_raw(&arrow_dataset,
+ "dataset", &arrow_dataset,
+ "file-system", priv->file_system,
+ "format", priv->format,
+ "partitioning", priv->partitioning,
+ NULL));
+}
+
+
+G_END_DECLS
+
+std::shared_ptr<arrow::dataset::DatasetFactory>
+gadataset_dataset_factory_get_raw(GADatasetDatasetFactory *factory)
+{
+ auto priv = GADATASET_DATASET_FACTORY_GET_PRIVATE(factory);
+ return priv->factory;
+}
diff --git a/src/arrow/c_glib/arrow-dataset-glib/dataset-factory.h b/src/arrow/c_glib/arrow-dataset-glib/dataset-factory.h
new file mode 100644
index 000000000..e2ee3ed98
--- /dev/null
+++ b/src/arrow/c_glib/arrow-dataset-glib/dataset-factory.h
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-dataset-glib/dataset.h>
+
+G_BEGIN_DECLS
+
+#define GADATASET_TYPE_DATASET_FACTORY (gadataset_dataset_factory_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetDatasetFactory,
+ gadataset_dataset_factory,
+ GADATASET,
+ DATASET_FACTORY,
+ GObject)
+struct _GADatasetDatasetFactoryClass
+{
+ GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GADatasetDataset *
+gadataset_dataset_factory_finish(GADatasetDatasetFactory *factory,
+ GError **error);
+
+
+#define GADATASET_TYPE_FILE_SYSTEM_DATASET_FACTORY \
+ (gadataset_file_system_dataset_factory_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetFileSystemDatasetFactory,
+ gadataset_file_system_dataset_factory,
+ GADATASET,
+ FILE_SYSTEM_DATASET_FACTORY,
+ GADatasetDatasetFactory)
+struct _GADatasetFileSystemDatasetFactoryClass
+{
+ GADatasetDatasetFactoryClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GADatasetFileSystemDatasetFactory *
+gadataset_file_system_dataset_factory_new(GADatasetFileFormat *file_format);
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gadataset_file_system_dataset_factory_set_file_system(
+ GADatasetFileSystemDatasetFactory *factory,
+ GArrowFileSystem *file_system,
+ GError **error);
+gboolean
+gadataset_file_system_dataset_factory_set_file_system_uri(
+ GADatasetFileSystemDatasetFactory *factory,
+ const gchar *uri,
+ GError **error);
+
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gadataset_file_system_dataset_factory_add_path(
+ GADatasetFileSystemDatasetFactory *factory,
+ const gchar *path,
+ GError **error);
+/*
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gadataset_file_system_dataset_factory_add_file(
+ GADatasetFileSystemDatasetFactory *factory,
+ GArrowFileInfo *file,
+ GError **error);
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gadataset_file_system_dataset_factory_add_selector(
+ GADatasetFileSystemDatasetFactory *factory,
+ GArrorFileSelector *selector,
+ GError **error);
+*/
+
+GARROW_AVAILABLE_IN_5_0
+GADatasetFileSystemDataset *
+gadataset_file_system_dataset_factory_finish(
+ GADatasetFileSystemDatasetFactory *factory,
+ GError **error);
+
+
+G_END_DECLS
diff --git a/src/arrow/c_glib/arrow-dataset-glib/dataset-factory.hpp b/src/arrow/c_glib/arrow-dataset-glib/dataset-factory.hpp
new file mode 100644
index 000000000..114db35bc
--- /dev/null
+++ b/src/arrow/c_glib/arrow-dataset-glib/dataset-factory.hpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow/dataset/api.h>
+
+#include <arrow-dataset-glib/dataset-factory.h>
+
+std::shared_ptr<arrow::dataset::DatasetFactory>
+gadataset_dataset_factory_get_raw(GADatasetDatasetFactory *factory);
diff --git a/src/arrow/c_glib/arrow-dataset-glib/dataset.cpp b/src/arrow/c_glib/arrow-dataset-glib/dataset.cpp
new file mode 100644
index 000000000..8613bedad
--- /dev/null
+++ b/src/arrow/c_glib/arrow-dataset-glib/dataset.cpp
@@ -0,0 +1,736 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow-glib/error.hpp>
+#include <arrow-glib/file-system.hpp>
+#include <arrow-glib/table.hpp>
+
+#include <arrow-dataset-glib/dataset-factory.hpp>
+#include <arrow-dataset-glib/dataset.hpp>
+#include <arrow-dataset-glib/file-format.hpp>
+#include <arrow-dataset-glib/partitioning.hpp>
+#include <arrow-dataset-glib/scanner.hpp>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: dataset
+ * @section_id: dataset
+ * @title: Dataset related classes
+ * @include: arrow-dataset-glib/arrow-dataset-glib.h
+ *
+ * #GADatasetDataset is a base class for datasets.
+ *
+ * #GADatasetFileSystemDataset is a class for file system dataset.
+ *
+ * #GADatasetFileSystemDatasetWriteOptions is a class for options to
+ * write a dataset to file system dataset.
+ *
+ * Since: 5.0.0
+ */
+
+typedef struct GADatasetDatasetPrivate_ {
+ std::shared_ptr<arrow::dataset::Dataset> dataset;
+} GADatasetDatasetPrivate;
+
+enum {
+ PROP_DATASET = 1,
+};
+
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADatasetDataset,
+ gadataset_dataset,
+ G_TYPE_OBJECT)
+
+#define GADATASET_DATASET_GET_PRIVATE(obj) \
+ static_cast<GADatasetDatasetPrivate *>( \
+ gadataset_dataset_get_instance_private( \
+ GADATASET_DATASET(obj)))
+
+static void
+gadataset_dataset_finalize(GObject *object)
+{
+ auto priv = GADATASET_DATASET_GET_PRIVATE(object);
+ priv->dataset.~shared_ptr();
+ G_OBJECT_CLASS(gadataset_dataset_parent_class)->finalize(object);
+}
+
+static void
+gadataset_dataset_set_property(GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ auto priv = GADATASET_DATASET_GET_PRIVATE(object);
+
+ switch (prop_id) {
+ case PROP_DATASET:
+ priv->dataset =
+ *static_cast<std::shared_ptr<arrow::dataset::Dataset> *>(
+ g_value_get_pointer(value));
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+gadataset_dataset_init(GADatasetDataset *object)
+{
+ auto priv = GADATASET_DATASET_GET_PRIVATE(object);
+ new(&priv->dataset) std::shared_ptr<arrow::dataset::Dataset>;
+}
+
+static void
+gadataset_dataset_class_init(GADatasetDatasetClass *klass)
+{
+ auto gobject_class = G_OBJECT_CLASS(klass);
+ gobject_class->finalize = gadataset_dataset_finalize;
+ gobject_class->set_property = gadataset_dataset_set_property;
+
+ GParamSpec *spec;
+ spec = g_param_spec_pointer("dataset",
+ "Dataset",
+ "The raw "
+ "std::shared<arrow::dataset::Dataset> *",
+ static_cast<GParamFlags>(G_PARAM_WRITABLE |
+ G_PARAM_CONSTRUCT_ONLY));
+ g_object_class_install_property(gobject_class, PROP_DATASET, spec);
+}
+
+/**
+ * gadataset_dataset_begin_scan:
+ * @dataset: A #GADatasetDataset.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full) (nullable):
+ * A newly created #GADatasetScannerBuilder on success, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GADatasetScannerBuilder *
+gadataset_dataset_begin_scan(GADatasetDataset *dataset,
+ GError **error)
+{
+ return gadataset_scanner_builder_new(dataset, error);
+}
+
+/**
+ * gadataset_dataset_to_table:
+ * @dataset: A #GADatasetDataset.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full) (nullable):
+ * A loaded #GArrowTable on success, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GArrowTable *
+gadataset_dataset_to_table(GADatasetDataset *dataset,
+ GError **error)
+{
+ auto arrow_dataset = gadataset_dataset_get_raw(dataset);
+ auto arrow_scanner_builder_result = arrow_dataset->NewScan();
+ if (!garrow::check(error,
+ arrow_scanner_builder_result,
+ "[dataset][to-table]")) {
+ return NULL;
+ }
+ auto arrow_scanner_builder = *arrow_scanner_builder_result;
+ auto arrow_scanner_result = arrow_scanner_builder->Finish();
+ if (!garrow::check(error,
+ arrow_scanner_result,
+ "[dataset][to-table]")) {
+ return NULL;
+ }
+ auto arrow_scanner = *arrow_scanner_result;
+ auto arrow_table_result = arrow_scanner->ToTable();
+ if (!garrow::check(error,
+ arrow_scanner_result,
+ "[dataset][to-table]")) {
+ return NULL;
+ }
+ return garrow_table_new_raw(&(*arrow_table_result));
+}
+
+/**
+ * gadataset_dataset_get_type_name:
+ * @dataset: A #GADatasetDataset.
+ *
+ * Returns: The type name of @dataset.
+ *
+ * It should be freed with g_free() when no longer needed.
+ *
+ * Since: 5.0.0
+ */
+gchar *
+gadataset_dataset_get_type_name(GADatasetDataset *dataset)
+{
+ const auto arrow_dataset = gadataset_dataset_get_raw(dataset);
+ const auto &type_name = arrow_dataset->type_name();
+ return g_strndup(type_name.data(), type_name.size());
+}
+
+
+typedef struct GADatasetFileSystemDatasetWriteOptionsPrivate_ {
+ arrow::dataset::FileSystemDatasetWriteOptions options;
+ GADatasetFileWriteOptions *file_write_options;
+ GArrowFileSystem *file_system;
+ GADatasetPartitioning *partitioning;
+} GADatasetFileSystemDatasetWriteOptionsPrivate;
+
+enum {
+ PROP_FILE_WRITE_OPTIONS = 1,
+ PROP_FILE_SYSTEM,
+ PROP_BASE_DIR,
+ PROP_PARTITIONING,
+ PROP_MAX_PARTITIONS,
+ PROP_BASE_NAME_TEMPLATE,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileSystemDatasetWriteOptions,
+ gadataset_file_system_dataset_write_options,
+ G_TYPE_OBJECT)
+
+#define GADATASET_FILE_SYSTEM_DATASET_WRITE_OPTIONS_GET_PRIVATE(obj) \
+ static_cast<GADatasetFileSystemDatasetWriteOptionsPrivate *>( \
+ gadataset_file_system_dataset_write_options_get_instance_private( \
+ GADATASET_FILE_SYSTEM_DATASET_WRITE_OPTIONS(obj)))
+
+static void
+gadataset_file_system_dataset_write_options_finalize(GObject *object)
+{
+ auto priv = GADATASET_FILE_SYSTEM_DATASET_WRITE_OPTIONS_GET_PRIVATE(object);
+ priv->options.~FileSystemDatasetWriteOptions();
+ G_OBJECT_CLASS(gadataset_file_system_dataset_write_options_parent_class)->
+ finalize(object);
+}
+
+static void
+gadataset_file_system_dataset_write_options_dispose(GObject *object)
+{
+ auto priv = GADATASET_FILE_SYSTEM_DATASET_WRITE_OPTIONS_GET_PRIVATE(object);
+
+ if (priv->file_write_options) {
+ g_object_unref(priv->file_write_options);
+ priv->file_write_options = NULL;
+ }
+
+ if (priv->file_system) {
+ g_object_unref(priv->file_system);
+ priv->file_system = NULL;
+ }
+
+ if (priv->partitioning) {
+ g_object_unref(priv->partitioning);
+ priv->partitioning = NULL;
+ }
+
+ G_OBJECT_CLASS(gadataset_file_system_dataset_write_options_parent_class)->
+ dispose(object);
+}
+
+static void
+gadataset_file_system_dataset_write_options_set_property(GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ auto priv = GADATASET_FILE_SYSTEM_DATASET_WRITE_OPTIONS_GET_PRIVATE(object);
+
+ switch (prop_id) {
+ case PROP_FILE_WRITE_OPTIONS:
+ {
+ auto file_write_options = g_value_get_object(value);
+ if (file_write_options == priv->file_write_options) {
+ break;
+ }
+ auto old_file_write_options = priv->file_write_options;
+ if (file_write_options) {
+ g_object_ref(file_write_options);
+ priv->file_write_options =
+ GADATASET_FILE_WRITE_OPTIONS(file_write_options);
+ priv->options.file_write_options =
+ gadataset_file_write_options_get_raw(priv->file_write_options);
+ } else {
+ priv->options.file_write_options = nullptr;
+ }
+ if (old_file_write_options) {
+ g_object_unref(old_file_write_options);
+ }
+ }
+ break;
+ case PROP_FILE_SYSTEM:
+ {
+ auto file_system = g_value_get_object(value);
+ if (file_system == priv->file_system) {
+ break;
+ }
+ auto old_file_system = priv->file_system;
+ if (file_system) {
+ g_object_ref(file_system);
+ priv->file_system = GARROW_FILE_SYSTEM(file_system);
+ priv->options.filesystem = garrow_file_system_get_raw(priv->file_system);
+ } else {
+ priv->options.filesystem = nullptr;
+ }
+ if (old_file_system) {
+ g_object_unref(old_file_system);
+ }
+ }
+ break;
+ case PROP_BASE_DIR:
+ priv->options.base_dir = g_value_get_string(value);
+ break;
+ case PROP_PARTITIONING:
+ {
+ auto partitioning = g_value_get_object(value);
+ if (partitioning == priv->partitioning) {
+ break;
+ }
+ auto old_partitioning = priv->partitioning;
+ if (partitioning) {
+ g_object_ref(partitioning);
+ priv->partitioning = GADATASET_PARTITIONING(partitioning);
+ priv->options.partitioning =
+ gadataset_partitioning_get_raw(priv->partitioning);
+ } else {
+ priv->options.partitioning = arrow::dataset::Partitioning::Default();
+ }
+ if (old_partitioning) {
+ g_object_unref(old_partitioning);
+ }
+ }
+ break;
+ case PROP_MAX_PARTITIONS:
+ priv->options.max_partitions = g_value_get_uint(value);
+ break;
+ case PROP_BASE_NAME_TEMPLATE:
+ priv->options.basename_template = g_value_get_string(value);
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+gadataset_file_system_dataset_write_options_get_property(GObject *object,
+ guint prop_id,
+ GValue *value,
+ GParamSpec *pspec)
+{
+ auto priv = GADATASET_FILE_SYSTEM_DATASET_WRITE_OPTIONS_GET_PRIVATE(object);
+
+ switch (prop_id) {
+ case PROP_FILE_WRITE_OPTIONS:
+ g_value_set_object(value, priv->file_write_options);
+ break;
+ case PROP_FILE_SYSTEM:
+ g_value_set_object(value, priv->file_system);
+ break;
+ case PROP_BASE_DIR:
+ g_value_set_string(value, priv->options.base_dir.c_str());
+ break;
+ case PROP_PARTITIONING:
+ g_value_set_object(value, priv->partitioning);
+ break;
+ case PROP_MAX_PARTITIONS:
+ g_value_set_uint(value, priv->options.max_partitions);
+ break;
+ case PROP_BASE_NAME_TEMPLATE:
+ g_value_set_string(value, priv->options.basename_template.c_str());
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+gadataset_file_system_dataset_write_options_init(
+ GADatasetFileSystemDatasetWriteOptions *object)
+{
+ auto priv = GADATASET_FILE_SYSTEM_DATASET_WRITE_OPTIONS_GET_PRIVATE(object);
+ new(&(priv->options)) arrow::dataset::FileSystemDatasetWriteOptions;
+ priv->options.partitioning = arrow::dataset::Partitioning::Default();
+}
+
+static void
+gadataset_file_system_dataset_write_options_class_init(
+ GADatasetFileSystemDatasetWriteOptionsClass *klass)
+{
+ auto gobject_class = G_OBJECT_CLASS(klass);
+ gobject_class->finalize =
+ gadataset_file_system_dataset_write_options_finalize;
+ gobject_class->dispose =
+ gadataset_file_system_dataset_write_options_dispose;
+ gobject_class->set_property =
+ gadataset_file_system_dataset_write_options_set_property;
+ gobject_class->get_property =
+ gadataset_file_system_dataset_write_options_get_property;
+
+ arrow::dataset::FileSystemDatasetWriteOptions default_options;
+ GParamSpec *spec;
+ /**
+ * GADatasetFileSystemDatasetWriteOptions:file_write_options:
+ *
+ * Options for individual fragment writing.
+ *
+ * Since: 6.0.0
+ */
+ spec = g_param_spec_object("file-write-options",
+ "File write options",
+ "Options for individual fragment writing",
+ GADATASET_TYPE_FILE_WRITE_OPTIONS,
+ static_cast<GParamFlags>(G_PARAM_READWRITE));
+ g_object_class_install_property(gobject_class, PROP_FILE_WRITE_OPTIONS, spec);
+
+ /**
+ * GADatasetFileSystemDatasetWriteOptions:file_system:
+ *
+ * #GArrowFileSystem into which a dataset will be written.
+ *
+ * Since: 6.0.0
+ */
+ spec = g_param_spec_object("file-system",
+ "File system",
+ "GArrowFileSystem into which "
+ "a dataset will be written",
+ GARROW_TYPE_FILE_SYSTEM,
+ static_cast<GParamFlags>(G_PARAM_READWRITE));
+ g_object_class_install_property(gobject_class, PROP_FILE_SYSTEM, spec);
+
+ /**
+ * GADatasetFileSystemDatasetWriteOptions:base_dir:
+ *
+ * Root directory into which the dataset will be written.
+ *
+ * Since: 6.0.0
+ */
+ spec = g_param_spec_string("base-dir",
+ "Base directory",
+ "Root directory into which "
+ "the dataset will be written",
+ NULL,
+ static_cast<GParamFlags>(G_PARAM_READWRITE));
+ g_object_class_install_property(gobject_class, PROP_BASE_DIR, spec);
+
+ /**
+ * GADatasetFileSystemDatasetWriteOptions:partitioning:
+ *
+ * #GADatasetPartitioning used to generate fragment paths.
+ *
+ * Since: 6.0.0
+ */
+ spec = g_param_spec_object("partitioning",
+ "Partitioning",
+ "GADatasetPartitioning used to "
+ "generate fragment paths",
+ GADATASET_TYPE_PARTITIONING,
+ static_cast<GParamFlags>(G_PARAM_READWRITE));
+ g_object_class_install_property(gobject_class, PROP_PARTITIONING, spec);
+
+ /**
+ * GADatasetFileSystemDatasetWriteOptions:max-partitions:
+ *
+ * Maximum number of partitions any batch may be written into.
+ *
+ * Since: 6.0.0
+ */
+ spec = g_param_spec_uint("max-partitions",
+ "Max partitions",
+ "Maximum number of partitions "
+ "any batch may be written into",
+ 0,
+ G_MAXINT,
+ default_options.max_partitions,
+ static_cast<GParamFlags>(G_PARAM_READWRITE));
+ g_object_class_install_property(gobject_class, PROP_MAX_PARTITIONS, spec);
+
+ /**
+ * GADatasetFileSystemDatasetWriteOptions:base-name-template:
+ *
+ * Template string used to generate fragment base names. {i} will be
+ * replaced by an auto incremented integer.
+ *
+ * Since: 6.0.0
+ */
+ spec = g_param_spec_string("base-name-template",
+ "Base name template",
+ "Template string used to generate fragment "
+ "base names. {i} will be replaced by "
+ "an auto incremented integer",
+ NULL,
+ static_cast<GParamFlags>(G_PARAM_READWRITE));
+ g_object_class_install_property(gobject_class, PROP_BASE_NAME_TEMPLATE, spec);
+}
+
+/**
+ * gadataset_file_system_dataset_write_options_new:
+ *
+ * Returns: The newly created #GADatasetFileSystemDatasetWriteOptions.
+ *
+ * Since: 6.0.0
+ */
+GADatasetFileSystemDatasetWriteOptions *
+gadataset_file_system_dataset_write_options_new(void)
+{
+ return GADATASET_FILE_SYSTEM_DATASET_WRITE_OPTIONS(
+ g_object_new(GADATASET_TYPE_FILE_SYSTEM_DATASET_WRITE_OPTIONS,
+ NULL));
+}
+
+
+typedef struct GADatasetFileSystemDatasetPrivate_ {
+ GADatasetFileFormat *format;
+ GArrowFileSystem *file_system;
+ GADatasetPartitioning *partitioning;
+} GADatasetFileSystemDatasetPrivate;
+
+enum {
+ PROP_FILE_SYSTEM_DATASET_FORMAT = 1,
+ PROP_FILE_SYSTEM_DATASET_FILE_SYSTEM,
+ PROP_FILE_SYSTEM_DATASET_PARTITIONING,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileSystemDataset,
+ gadataset_file_system_dataset,
+ GADATASET_TYPE_DATASET)
+
+#define GADATASET_FILE_SYSTEM_DATASET_GET_PRIVATE(obj) \
+ static_cast<GADatasetFileSystemDatasetPrivate *>( \
+ gadataset_file_system_dataset_get_instance_private( \
+ GADATASET_FILE_SYSTEM_DATASET(obj)))
+
+static void
+gadataset_file_system_dataset_dispose(GObject *object)
+{
+ auto priv = GADATASET_FILE_SYSTEM_DATASET_GET_PRIVATE(object);
+
+ if (priv->format) {
+ g_object_unref(priv->format);
+ priv->format = NULL;
+ }
+
+ if (priv->file_system) {
+ g_object_unref(priv->file_system);
+ priv->file_system = NULL;
+ }
+
+ G_OBJECT_CLASS(gadataset_file_system_dataset_parent_class)->dispose(object);
+}
+
+static void
+gadataset_file_system_dataset_set_property(GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ auto priv = GADATASET_FILE_SYSTEM_DATASET_GET_PRIVATE(object);
+
+ switch (prop_id) {
+ case PROP_FILE_SYSTEM_DATASET_FORMAT:
+ priv->format = GADATASET_FILE_FORMAT(g_value_dup_object(value));
+ break;
+ case PROP_FILE_SYSTEM_DATASET_FILE_SYSTEM:
+ priv->file_system = GARROW_FILE_SYSTEM(g_value_dup_object(value));
+ break;
+ case PROP_FILE_SYSTEM_DATASET_PARTITIONING:
+ priv->partitioning = GADATASET_PARTITIONING(g_value_dup_object(value));
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+gadataset_file_system_dataset_get_property(GObject *object,
+ guint prop_id,
+ GValue *value,
+ GParamSpec *pspec)
+{
+ auto priv = GADATASET_FILE_SYSTEM_DATASET_GET_PRIVATE(object);
+
+ switch (prop_id) {
+ case PROP_FILE_SYSTEM_DATASET_FORMAT:
+ g_value_set_object(value, priv->format);
+ break;
+ case PROP_FILE_SYSTEM_DATASET_FILE_SYSTEM:
+ g_value_set_object(value, priv->file_system);
+ break;
+ case PROP_FILE_SYSTEM_DATASET_PARTITIONING:
+ g_value_set_object(value, priv->partitioning);
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+gadataset_file_system_dataset_init(GADatasetFileSystemDataset *object)
+{
+}
+
+static void
+gadataset_file_system_dataset_class_init(GADatasetFileSystemDatasetClass *klass)
+{
+ auto gobject_class = G_OBJECT_CLASS(klass);
+ gobject_class->dispose = gadataset_file_system_dataset_dispose;
+ gobject_class->set_property = gadataset_file_system_dataset_set_property;
+ gobject_class->get_property = gadataset_file_system_dataset_get_property;
+
+ GParamSpec *spec;
+ /**
+ * GADatasetFileSystemDataset:format:
+ *
+ * Format of the dataset.
+ *
+ * Since: 5.0.0
+ */
+ spec = g_param_spec_object("format",
+ "Format",
+ "Format of the dataset",
+ GADATASET_TYPE_FILE_FORMAT,
+ static_cast<GParamFlags>(G_PARAM_READWRITE |
+ G_PARAM_CONSTRUCT_ONLY));
+ g_object_class_install_property(gobject_class,
+ PROP_FILE_SYSTEM_DATASET_FORMAT,
+ spec);
+
+ /**
+ * GADatasetFileSystemDataset:file-system:
+ *
+ * File system of the dataset.
+ *
+ * Since: 5.0.0
+ */
+ spec = g_param_spec_object("file-system",
+ "File system",
+ "File system of the dataset",
+ GARROW_TYPE_FILE_SYSTEM,
+ static_cast<GParamFlags>(G_PARAM_READWRITE |
+ G_PARAM_CONSTRUCT_ONLY));
+ g_object_class_install_property(gobject_class,
+ PROP_FILE_SYSTEM_DATASET_FILE_SYSTEM,
+ spec);
+
+ /**
+ * GADatasetFileSystemDataset:partitioning:
+ *
+ * Partitioning of the dataset.
+ *
+ * Since: 6.0.0
+ */
+ spec = g_param_spec_object("partitioning",
+ "Partitioning",
+ "Partitioning of the dataset",
+ GADATASET_TYPE_PARTITIONING,
+ static_cast<GParamFlags>(G_PARAM_READWRITE |
+ G_PARAM_CONSTRUCT_ONLY));
+ g_object_class_install_property(gobject_class,
+ PROP_FILE_SYSTEM_DATASET_PARTITIONING,
+ spec);
+}
+
+/**
+ * gadataset_file_system_dataset_write_scanner:
+ * @scanner: A #GADatasetScanner that produces data to be written.
+ * @options: A #GADatasetFileSystemDatasetWriteOptions.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 6.0.0
+ */
+gboolean
+gadataset_file_system_dataset_write_scanner(
+ GADatasetScanner *scanner,
+ GADatasetFileSystemDatasetWriteOptions *options,
+ GError **error)
+{
+ auto arrow_scanner = gadataset_scanner_get_raw(scanner);
+ auto arrow_options =
+ gadataset_file_system_dataset_write_options_get_raw(options);
+ auto status =
+ arrow::dataset::FileSystemDataset::Write(*arrow_options, arrow_scanner);
+ return garrow::check(error,
+ status,
+ "[file-system-dataset][write-scanner]");
+}
+
+
+G_END_DECLS
+
+GADatasetDataset *
+gadataset_dataset_new_raw(
+ std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset)
+{
+ return gadataset_dataset_new_raw(arrow_dataset,
+ "dataset", arrow_dataset,
+ NULL);
+}
+
+GADatasetDataset *
+gadataset_dataset_new_raw(
+ std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset,
+ const gchar *first_property_name,
+ ...)
+{
+ va_list args;
+ va_start(args, first_property_name);
+ auto array = gadataset_dataset_new_raw_valist(arrow_dataset,
+ first_property_name,
+ args);
+ va_end(args);
+ return array;
+}
+
+GADatasetDataset *
+gadataset_dataset_new_raw_valist(
+ std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset,
+ const gchar *first_property_name,
+ va_list args)
+{
+ GType type = GADATASET_TYPE_DATASET;
+ const auto type_name = (*arrow_dataset)->type_name();
+ if (type_name == "filesystem") {
+ type = GADATASET_TYPE_FILE_SYSTEM_DATASET;
+ }
+ return GADATASET_DATASET(g_object_new_valist(type,
+ first_property_name,
+ args));
+}
+
+std::shared_ptr<arrow::dataset::Dataset>
+gadataset_dataset_get_raw(GADatasetDataset *dataset)
+{
+ auto priv = GADATASET_DATASET_GET_PRIVATE(dataset);
+ return priv->dataset;
+}
+
+arrow::dataset::FileSystemDatasetWriteOptions *
+gadataset_file_system_dataset_write_options_get_raw(
+ GADatasetFileSystemDatasetWriteOptions *options)
+{
+ auto priv = GADATASET_FILE_SYSTEM_DATASET_WRITE_OPTIONS_GET_PRIVATE(options);
+ return &(priv->options);
+}
diff --git a/src/arrow/c_glib/arrow-dataset-glib/dataset.h b/src/arrow/c_glib/arrow-dataset-glib/dataset.h
new file mode 100644
index 000000000..86d077caa
--- /dev/null
+++ b/src/arrow/c_glib/arrow-dataset-glib/dataset.h
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-dataset-glib/file-format.h>
+
+G_BEGIN_DECLS
+
+typedef struct _GADatasetScannerBuilder GADatasetScannerBuilder;
+typedef struct _GADatasetScanner GADatasetScanner;
+
+#define GADATASET_TYPE_DATASET (gadataset_dataset_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetDataset,
+ gadataset_dataset,
+ GADATASET,
+ DATASET,
+ GObject)
+struct _GADatasetDatasetClass
+{
+ GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GADatasetScannerBuilder *
+gadataset_dataset_begin_scan(GADatasetDataset *dataset,
+ GError **error);
+GARROW_AVAILABLE_IN_5_0
+GArrowTable *
+gadataset_dataset_to_table(GADatasetDataset *dataset,
+ GError **error);
+GARROW_AVAILABLE_IN_5_0
+gchar *
+gadataset_dataset_get_type_name(GADatasetDataset *dataset);
+
+
+#define GADATASET_TYPE_FILE_SYSTEM_DATASET_WRITE_OPTIONS \
+ (gadataset_file_system_dataset_write_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetFileSystemDatasetWriteOptions,
+ gadataset_file_system_dataset_write_options,
+ GADATASET,
+ FILE_SYSTEM_DATASET_WRITE_OPTIONS,
+ GObject)
+struct _GADatasetFileSystemDatasetWriteOptionsClass
+{
+ GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_6_0
+GADatasetFileSystemDatasetWriteOptions *
+gadataset_file_system_dataset_write_options_new(void);
+
+
+#define GADATASET_TYPE_FILE_SYSTEM_DATASET \
+ (gadataset_file_system_dataset_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetFileSystemDataset,
+ gadataset_file_system_dataset,
+ GADATASET,
+ FILE_SYSTEM_DATASET,
+ GADatasetDataset)
+struct _GADatasetFileSystemDatasetClass
+{
+ GADatasetDatasetClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_6_0
+gboolean
+gadataset_file_system_dataset_write_scanner(
+ GADatasetScanner *scanner,
+ GADatasetFileSystemDatasetWriteOptions *options,
+ GError **error);
+
+
+G_END_DECLS
diff --git a/src/arrow/c_glib/arrow-dataset-glib/dataset.hpp b/src/arrow/c_glib/arrow-dataset-glib/dataset.hpp
new file mode 100644
index 000000000..1dab391e8
--- /dev/null
+++ b/src/arrow/c_glib/arrow-dataset-glib/dataset.hpp
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow/dataset/api.h>
+
+#include <arrow-dataset-glib/dataset.h>
+
+
+GADatasetDataset *
+gadataset_dataset_new_raw(
+ std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset);
+GADatasetDataset *
+gadataset_dataset_new_raw(
+ std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset,
+ const gchar *first_property_name,
+ ...);
+GADatasetDataset *
+gadataset_dataset_new_raw_valist(
+ std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset,
+ const gchar *first_property_name,
+ va_list arg);
+std::shared_ptr<arrow::dataset::Dataset>
+gadataset_dataset_get_raw(GADatasetDataset *dataset);
+
+
+arrow::dataset::FileSystemDatasetWriteOptions *
+gadataset_file_system_dataset_write_options_get_raw(
+ GADatasetFileSystemDatasetWriteOptions *options);
diff --git a/src/arrow/c_glib/arrow-dataset-glib/enums.c.template b/src/arrow/c_glib/arrow-dataset-glib/enums.c.template
new file mode 100644
index 000000000..8921ab062
--- /dev/null
+++ b/src/arrow/c_glib/arrow-dataset-glib/enums.c.template
@@ -0,0 +1,52 @@
+/*** BEGIN file-header ***/
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow-dataset-glib/arrow-dataset-glib.h>
+/*** END file-header ***/
+
+/*** BEGIN file-production ***/
+
+/* enumerations from "@filename@" */
+/*** END file-production ***/
+
+/*** BEGIN value-header ***/
+GType
+@enum_name@_get_type(void)
+{
+ static GType etype = 0;
+ if (G_UNLIKELY(etype == 0)) {
+ static const G@Type@Value values[] = {
+/*** END value-header ***/
+
+/*** BEGIN value-production ***/
+ {@VALUENAME@, "@VALUENAME@", "@valuenick@"},
+/*** END value-production ***/
+
+/*** BEGIN value-tail ***/
+ {0, NULL, NULL}
+ };
+ etype = g_@type@_register_static(g_intern_static_string("@EnumName@"), values);
+ }
+ return etype;
+}
+/*** END value-tail ***/
+
+/*** BEGIN file-tail ***/
+/*** END file-tail ***/
diff --git a/src/arrow/c_glib/arrow-dataset-glib/enums.h.template b/src/arrow/c_glib/arrow-dataset-glib/enums.h.template
new file mode 100644
index 000000000..d6a0a455f
--- /dev/null
+++ b/src/arrow/c_glib/arrow-dataset-glib/enums.h.template
@@ -0,0 +1,41 @@
+/*** BEGIN file-header ***/
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-dataset-glib/partitioning.h>
+
+G_BEGIN_DECLS
+/*** END file-header ***/
+
+/*** BEGIN file-production ***/
+
+/* enumerations from "@filename@" */
+/*** END file-production ***/
+
+/*** BEGIN value-header ***/
+GType @enum_name@_get_type(void) G_GNUC_CONST;
+#define @ENUMPREFIX@_TYPE_@ENUMSHORT@ (@enum_name@_get_type())
+/*** END value-header ***/
+
+/*** BEGIN file-tail ***/
+
+G_END_DECLS
+/*** END file-tail ***/
diff --git a/src/arrow/c_glib/arrow-dataset-glib/file-format.cpp b/src/arrow/c_glib/arrow-dataset-glib/file-format.cpp
new file mode 100644
index 000000000..c0c92d966
--- /dev/null
+++ b/src/arrow/c_glib/arrow-dataset-glib/file-format.cpp
@@ -0,0 +1,574 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow-glib/error.hpp>
+#include <arrow-glib/file-system.hpp>
+#include <arrow-glib/output-stream.hpp>
+#include <arrow-glib/record-batch.hpp>
+#include <arrow-glib/reader.hpp>
+#include <arrow-glib/schema.hpp>
+
+#include <arrow-dataset-glib/file-format.hpp>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: file-format
+ * @section_id: file-format
+ * @title: File format classes
+ * @include: arrow-dataset-glib/arrow-dataset-glib.h
+ *
+ * #GADatasetFileWriteOptions is a class for options to write a file
+ * of this format.
+ *
+ * #GADatasetFileWriter is a class for writing a file of this format.
+ *
+ * #GADatasetFileFormat is a base class for file format classes.
+ *
+ * #GADatasetCSVFileFormat is a class for CSV file format.
+ *
+ * #GADatasetIPCFileFormat is a class for IPC file format.
+ *
+ * #GADatasetParquetFileFormat is a class for Parquet file format.
+ *
+ * Since: 3.0.0
+ */
+
+typedef struct GADatasetFileWriteOptionsPrivate_ {
+ std::shared_ptr<arrow::dataset::FileWriteOptions> options;
+} GADatasetFileWriteOptionsPrivate;
+
+enum {
+ PROP_OPTIONS = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileWriteOptions,
+ gadataset_file_write_options,
+ G_TYPE_OBJECT)
+
+#define GADATASET_FILE_WRITE_OPTIONS_GET_PRIVATE(obj) \
+ static_cast<GADatasetFileWriteOptionsPrivate *>( \
+ gadataset_file_write_options_get_instance_private( \
+ GADATASET_FILE_WRITE_OPTIONS(obj)))
+
+static void
+gadataset_file_write_options_finalize(GObject *object)
+{
+ auto priv = GADATASET_FILE_WRITE_OPTIONS_GET_PRIVATE(object);
+ priv->options.~shared_ptr();
+ G_OBJECT_CLASS(gadataset_file_write_options_parent_class)->finalize(object);
+}
+
+static void
+gadataset_file_write_options_set_property(GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ auto priv = GADATASET_FILE_WRITE_OPTIONS_GET_PRIVATE(object);
+
+ switch (prop_id) {
+ case PROP_OPTIONS:
+ priv->options =
+ *static_cast<std::shared_ptr<arrow::dataset::FileWriteOptions> *>(
+ g_value_get_pointer(value));
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+gadataset_file_write_options_init(GADatasetFileWriteOptions *object)
+{
+ auto priv = GADATASET_FILE_WRITE_OPTIONS_GET_PRIVATE(object);
+ new(&priv->options) std::shared_ptr<arrow::dataset::FileWriteOptions>;
+}
+
+static void
+gadataset_file_write_options_class_init(GADatasetFileWriteOptionsClass *klass)
+{
+ auto gobject_class = G_OBJECT_CLASS(klass);
+
+ gobject_class->finalize = gadataset_file_write_options_finalize;
+ gobject_class->set_property = gadataset_file_write_options_set_property;
+
+ GParamSpec *spec;
+ spec = g_param_spec_pointer("options",
+ "Options",
+ "The raw "
+ "std::shared<arrow::dataset::FileWriteOptions> *",
+ static_cast<GParamFlags>(G_PARAM_WRITABLE |
+ G_PARAM_CONSTRUCT_ONLY));
+ g_object_class_install_property(gobject_class, PROP_OPTIONS, spec);
+}
+
+
+typedef struct GADatasetFileWriterPrivate_ {
+ std::shared_ptr<arrow::dataset::FileWriter> writer;
+} GADatasetFileWriterPrivate;
+
+enum {
+ PROP_WRITER = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileWriter,
+ gadataset_file_writer,
+ G_TYPE_OBJECT)
+
+#define GADATASET_FILE_WRITER_GET_PRIVATE(obj) \
+ static_cast<GADatasetFileWriterPrivate *>( \
+ gadataset_file_writer_get_instance_private( \
+ GADATASET_FILE_WRITER(obj)))
+
+static void
+gadataset_file_writer_finalize(GObject *object)
+{
+ auto priv = GADATASET_FILE_WRITER_GET_PRIVATE(object);
+ priv->writer.~shared_ptr();
+ G_OBJECT_CLASS(gadataset_file_writer_parent_class)->finalize(object);
+}
+
+static void
+gadataset_file_writer_set_property(GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ auto priv = GADATASET_FILE_WRITER_GET_PRIVATE(object);
+
+ switch (prop_id) {
+ case PROP_WRITER:
+ priv->writer =
+ *static_cast<std::shared_ptr<arrow::dataset::FileWriter> *>(
+ g_value_get_pointer(value));
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+gadataset_file_writer_init(GADatasetFileWriter *object)
+{
+ auto priv = GADATASET_FILE_WRITER_GET_PRIVATE(object);
+ new(&(priv->writer)) std::shared_ptr<arrow::dataset::FileWriter>;
+}
+
+static void
+gadataset_file_writer_class_init(GADatasetFileWriterClass *klass)
+{
+ auto gobject_class = G_OBJECT_CLASS(klass);
+
+ gobject_class->finalize = gadataset_file_writer_finalize;
+ gobject_class->set_property = gadataset_file_writer_set_property;
+
+ GParamSpec *spec;
+ spec = g_param_spec_pointer("writer",
+ "Writer",
+ "The raw "
+ "std::shared<arrow::dataset::FileWriter> *",
+ static_cast<GParamFlags>(G_PARAM_WRITABLE |
+ G_PARAM_CONSTRUCT_ONLY));
+ g_object_class_install_property(gobject_class, PROP_WRITER, spec);
+}
+
+/**
+ * gadataset_file_writer_write_record_batch:
+ * @writer: A #GADatasetFileWriter.
+ * @record_batch: A #GArrowRecordBatch to be written.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 6.0.0
+ */
+gboolean
+gadataset_file_writer_write_record_batch(GADatasetFileWriter *writer,
+ GArrowRecordBatch *record_batch,
+ GError **error)
+{
+ const auto arrow_writer = gadataset_file_writer_get_raw(writer);
+ const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
+ auto status = arrow_writer->Write(arrow_record_batch);
+ return garrow::check(error, status, "[file-writer][write-record-batch]");
+}
+
+/**
+ * gadataset_file_writer_write_record_batch_reader:
+ * @writer: A #GADatasetFileWriter.
+ * @reader: A #GArrowRecordBatchReader to be written.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 6.0.0
+ */
+gboolean
+gadataset_file_writer_write_record_batch_reader(GADatasetFileWriter *writer,
+ GArrowRecordBatchReader *reader,
+ GError **error)
+{
+ const auto arrow_writer = gadataset_file_writer_get_raw(writer);
+ auto arrow_reader = garrow_record_batch_reader_get_raw(reader);
+ auto status = arrow_writer->Write(arrow_reader.get());
+ return garrow::check(error,
+ status,
+ "[file-writer][write-record-batch-reader]");
+}
+
+/**
+ * gadataset_file_writer_finish:
+ * @writer: A #GADatasetFileWriter.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 6.0.0
+ */
+gboolean
+gadataset_file_writer_finish(GADatasetFileWriter *writer,
+ GError **error)
+{
+ const auto arrow_writer = gadataset_file_writer_get_raw(writer);
+ auto status = arrow_writer->Finish();
+ return garrow::check(error,
+ status,
+ "[file-writer][finish]");
+}
+
+
+typedef struct GADatasetFileFormatPrivate_ {
+ std::shared_ptr<arrow::dataset::FileFormat> format;
+} GADatasetFileFormatPrivate;
+
+enum {
+ PROP_FORMAT = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileFormat,
+ gadataset_file_format,
+ G_TYPE_OBJECT)
+
+#define GADATASET_FILE_FORMAT_GET_PRIVATE(obj) \
+ static_cast<GADatasetFileFormatPrivate *>( \
+ gadataset_file_format_get_instance_private( \
+ GADATASET_FILE_FORMAT(obj)))
+
+static void
+gadataset_file_format_finalize(GObject *object)
+{
+ auto priv = GADATASET_FILE_FORMAT_GET_PRIVATE(object);
+ priv->format.~shared_ptr();
+ G_OBJECT_CLASS(gadataset_file_format_parent_class)->finalize(object);
+}
+
+static void
+gadataset_file_format_set_property(GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ auto priv = GADATASET_FILE_FORMAT_GET_PRIVATE(object);
+
+ switch (prop_id) {
+ case PROP_FORMAT:
+ priv->format =
+ *static_cast<std::shared_ptr<arrow::dataset::FileFormat> *>(
+ g_value_get_pointer(value));
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+gadataset_file_format_init(GADatasetFileFormat *object)
+{
+ auto priv = GADATASET_FILE_FORMAT_GET_PRIVATE(object);
+ new(&priv->format) std::shared_ptr<arrow::dataset::FileFormat>;
+}
+
+static void
+gadataset_file_format_class_init(GADatasetFileFormatClass *klass)
+{
+ auto gobject_class = G_OBJECT_CLASS(klass);
+
+ gobject_class->finalize = gadataset_file_format_finalize;
+ gobject_class->set_property = gadataset_file_format_set_property;
+
+ GParamSpec *spec;
+ spec = g_param_spec_pointer("format",
+ "Format",
+ "The raw std::shared<arrow::dataset::FileFormat> *",
+ static_cast<GParamFlags>(G_PARAM_WRITABLE |
+ G_PARAM_CONSTRUCT_ONLY));
+ g_object_class_install_property(gobject_class, PROP_FORMAT, spec);
+}
+
+/**
+ * gadataset_file_format_get_type_name:
+ * @format: A #GADatasetFileFormat.
+ *
+ * Returns: The type name of @format.
+ *
+ * It should be freed with g_free() when no longer needed.
+ *
+ * Since: 3.0.0
+ */
+gchar *
+gadataset_file_format_get_type_name(GADatasetFileFormat *format)
+{
+ const auto arrow_format = gadataset_file_format_get_raw(format);
+ const auto &type_name = arrow_format->type_name();
+ return g_strndup(type_name.data(), type_name.size());
+}
+
+/**
+ * gadataset_file_format_get_default_write_options:
+ * @format: A #GADatasetFileFormat.
+ *
+ * Returns: (transfer full): The default #GADatasetFileWriteOptions of @format.
+ *
+ * Since: 6.0.0
+ */
+GADatasetFileWriteOptions *
+gadataset_file_format_get_default_write_options(GADatasetFileFormat *format)
+{
+ const auto arrow_format = gadataset_file_format_get_raw(format);
+ auto arrow_options = arrow_format->DefaultWriteOptions();
+ return gadataset_file_write_options_new_raw(&arrow_options);
+}
+
+/**
+ * gadataset_file_format_open_writer:
+ * @format: A #GADatasetFileFormat.
+ * @destination: A #GArrowOutputStream.
+ * @file_system: The #GArrowFileSystem of @destination.
+ * @path: The path of @destination.
+ * @schema: A #GArrowSchema that is used by written record batches.
+ * @options: A #GADatasetFileWriteOptions.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full): The newly created #GADatasetFileWriter of @format
+ * on success, %NULL on error.
+ *
+ * Since: 6.0.0
+ */
+GADatasetFileWriter *
+gadataset_file_format_open_writer(GADatasetFileFormat *format,
+ GArrowOutputStream *destination,
+ GArrowFileSystem *file_system,
+ const gchar *path,
+ GArrowSchema *schema,
+ GADatasetFileWriteOptions *options,
+ GError **error)
+{
+ const auto arrow_format = gadataset_file_format_get_raw(format);
+ auto arrow_destination = garrow_output_stream_get_raw(destination);
+ auto arrow_file_system = garrow_file_system_get_raw(file_system);
+ auto arrow_schema = garrow_schema_get_raw(schema);
+ auto arrow_options = gadataset_file_write_options_get_raw(options);
+ auto arrow_writer_result =
+ arrow_format->MakeWriter(arrow_destination,
+ arrow_schema,
+ arrow_options,
+ {arrow_file_system, path});
+ if (garrow::check(error, arrow_writer_result, "[file-format][open-writer]")) {
+ auto arrow_writer = *arrow_writer_result;
+ return gadataset_file_writer_new_raw(&arrow_writer);
+ } else {
+ return NULL;
+ }
+}
+
+/**
+ * gadataset_file_format_equal:
+ * @format: A #GADatasetFileFormat.
+ * @other_format: A #GADatasetFileFormat to be compared.
+ *
+ * Returns: %TRUE if they are the same content file format, %FALSE otherwise.
+ *
+ * Since: 3.0.0
+ */
+gboolean
+gadataset_file_format_equal(GADatasetFileFormat *format,
+ GADatasetFileFormat *other_format)
+{
+ const auto arrow_format = gadataset_file_format_get_raw(format);
+ const auto arrow_other_format = gadataset_file_format_get_raw(other_format);
+ return arrow_format->Equals(*arrow_other_format);
+}
+
+
+G_DEFINE_TYPE(GADatasetCSVFileFormat,
+ gadataset_csv_file_format,
+ GADATASET_TYPE_FILE_FORMAT)
+
+static void
+gadataset_csv_file_format_init(GADatasetCSVFileFormat *object)
+{
+}
+
+static void
+gadataset_csv_file_format_class_init(GADatasetCSVFileFormatClass *klass)
+{
+}
+
+/**
+ * gadataset_csv_file_format_new:
+ *
+ * Returns: The newly created CSV file format.
+ *
+ * Since: 3.0.0
+ */
+GADatasetCSVFileFormat *
+gadataset_csv_file_format_new(void)
+{
+ std::shared_ptr<arrow::dataset::FileFormat> arrow_format =
+ std::make_shared<arrow::dataset::CsvFileFormat>();
+ return GADATASET_CSV_FILE_FORMAT(gadataset_file_format_new_raw(&arrow_format));
+}
+
+
+G_DEFINE_TYPE(GADatasetIPCFileFormat,
+ gadataset_ipc_file_format,
+ GADATASET_TYPE_FILE_FORMAT)
+
+static void
+gadataset_ipc_file_format_init(GADatasetIPCFileFormat *object)
+{
+}
+
+static void
+gadataset_ipc_file_format_class_init(GADatasetIPCFileFormatClass *klass)
+{
+}
+
+/**
+ * gadataset_ipc_file_format_new:
+ *
+ * Returns: The newly created IPC file format.
+ *
+ * Since: 3.0.0
+ */
+GADatasetIPCFileFormat *
+gadataset_ipc_file_format_new(void)
+{
+ std::shared_ptr<arrow::dataset::FileFormat> arrow_format =
+ std::make_shared<arrow::dataset::IpcFileFormat>();
+ return GADATASET_IPC_FILE_FORMAT(gadataset_file_format_new_raw(&arrow_format));
+}
+
+
+G_DEFINE_TYPE(GADatasetParquetFileFormat,
+ gadataset_parquet_file_format,
+ GADATASET_TYPE_FILE_FORMAT)
+
+static void
+gadataset_parquet_file_format_init(GADatasetParquetFileFormat *object)
+{
+}
+
+static void
+gadataset_parquet_file_format_class_init(GADatasetParquetFileFormatClass *klass)
+{
+}
+
+/**
+ * gadataset_parquet_file_format_new:
+ *
+ * Returns: The newly created Parquet file format.
+ *
+ * Since: 3.0.0
+ */
+GADatasetParquetFileFormat *
+gadataset_parquet_file_format_new(void)
+{
+ std::shared_ptr<arrow::dataset::FileFormat> arrow_format =
+ std::make_shared<arrow::dataset::ParquetFileFormat>();
+ return GADATASET_PARQUET_FILE_FORMAT(
+ gadataset_file_format_new_raw(&arrow_format));
+}
+
+
+G_END_DECLS
+
+GADatasetFileWriteOptions *
+gadataset_file_write_options_new_raw(
+ std::shared_ptr<arrow::dataset::FileWriteOptions> *arrow_options)
+{
+ return GADATASET_FILE_WRITE_OPTIONS(
+ g_object_new(GADATASET_TYPE_FILE_WRITE_OPTIONS,
+ "options", arrow_options,
+ NULL));
+}
+
+std::shared_ptr<arrow::dataset::FileWriteOptions>
+gadataset_file_write_options_get_raw(GADatasetFileWriteOptions *options)
+{
+ auto priv = GADATASET_FILE_WRITE_OPTIONS_GET_PRIVATE(options);
+ return priv->options;
+}
+
+
+GADatasetFileWriter *
+gadataset_file_writer_new_raw(
+ std::shared_ptr<arrow::dataset::FileWriter> *arrow_writer)
+{
+ return GADATASET_FILE_WRITER(g_object_new(GADATASET_TYPE_FILE_WRITER,
+ "writer", arrow_writer,
+ NULL));
+}
+
+std::shared_ptr<arrow::dataset::FileWriter>
+gadataset_file_writer_get_raw(GADatasetFileWriter *writer)
+{
+ auto priv = GADATASET_FILE_WRITER_GET_PRIVATE(writer);
+ return priv->writer;
+}
+
+
+GADatasetFileFormat *
+gadataset_file_format_new_raw(
+ std::shared_ptr<arrow::dataset::FileFormat> *arrow_format)
+{
+ GType type = GADATASET_TYPE_FILE_FORMAT;
+ const auto &type_name = (*arrow_format)->type_name();
+ if (type_name == "csv") {
+ type = GADATASET_TYPE_CSV_FILE_FORMAT;
+ } else if (type_name == "ipc") {
+ type = GADATASET_TYPE_IPC_FILE_FORMAT;
+ } else if (type_name == "parquet") {
+ type = GADATASET_TYPE_PARQUET_FILE_FORMAT;
+ }
+ return GADATASET_FILE_FORMAT(g_object_new(type,
+ "format", arrow_format,
+ NULL));
+}
+
+std::shared_ptr<arrow::dataset::FileFormat>
+gadataset_file_format_get_raw(GADatasetFileFormat *format)
+{
+ auto priv = GADATASET_FILE_FORMAT_GET_PRIVATE(format);
+ return priv->format;
+}
diff --git a/src/arrow/c_glib/arrow-dataset-glib/file-format.h b/src/arrow/c_glib/arrow-dataset-glib/file-format.h
new file mode 100644
index 000000000..16a834074
--- /dev/null
+++ b/src/arrow/c_glib/arrow-dataset-glib/file-format.h
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-glib/arrow-glib.h>
+
+G_BEGIN_DECLS
+
+#define GADATASET_TYPE_FILE_WRITE_OPTIONS \
+ (gadataset_file_write_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetFileWriteOptions,
+ gadataset_file_write_options,
+ GADATASET,
+ FILE_WRITE_OPTIONS,
+ GObject)
+struct _GADatasetFileWriteOptionsClass
+{
+ GObjectClass parent_class;
+};
+
+
+#define GADATASET_TYPE_FILE_WRITER \
+ (gadataset_file_writer_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetFileWriter,
+ gadataset_file_writer,
+ GADATASET,
+ FILE_WRITER,
+ GObject)
+struct _GADatasetFileWriterClass
+{
+ GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_6_0
+gboolean
+gadataset_file_writer_write_record_batch(GADatasetFileWriter *writer,
+ GArrowRecordBatch *record_batch,
+ GError **error);
+GARROW_AVAILABLE_IN_6_0
+gboolean
+gadataset_file_writer_write_record_batch_reader(GADatasetFileWriter *writer,
+ GArrowRecordBatchReader *reader,
+ GError **error);
+GARROW_AVAILABLE_IN_6_0
+gboolean
+gadataset_file_writer_finish(GADatasetFileWriter *writer,
+ GError **error);
+
+
+#define GADATASET_TYPE_FILE_FORMAT (gadataset_file_format_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetFileFormat,
+ gadataset_file_format,
+ GADATASET,
+ FILE_FORMAT,
+ GObject)
+struct _GADatasetFileFormatClass
+{
+ GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_3_0
+gchar *
+gadataset_file_format_get_type_name(GADatasetFileFormat *format);
+GARROW_AVAILABLE_IN_6_0
+GADatasetFileWriteOptions *
+gadataset_file_format_get_default_write_options(GADatasetFileFormat *format);
+GARROW_AVAILABLE_IN_6_0
+GADatasetFileWriter *
+gadataset_file_format_open_writer(GADatasetFileFormat *format,
+ GArrowOutputStream *destination,
+ GArrowFileSystem *file_system,
+ const gchar *path,
+ GArrowSchema *schema,
+ GADatasetFileWriteOptions *options,
+ GError **error);
+
+GARROW_AVAILABLE_IN_3_0
+gboolean
+gadataset_file_format_equal(GADatasetFileFormat *format,
+ GADatasetFileFormat *other_format);
+
+
+#define GADATASET_TYPE_CSV_FILE_FORMAT (gadataset_csv_file_format_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetCSVFileFormat,
+ gadataset_csv_file_format,
+ GADATASET,
+ CSV_FILE_FORMAT,
+ GADatasetFileFormat)
+struct _GADatasetCSVFileFormatClass
+{
+ GADatasetFileFormatClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_3_0
+GADatasetCSVFileFormat *gadataset_csv_file_format_new(void);
+
+
+#define GADATASET_TYPE_IPC_FILE_FORMAT (gadataset_ipc_file_format_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetIPCFileFormat,
+ gadataset_ipc_file_format,
+ GADATASET,
+ IPC_FILE_FORMAT,
+ GADatasetFileFormat)
+struct _GADatasetIPCFileFormatClass
+{
+ GADatasetFileFormatClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_3_0
+GADatasetIPCFileFormat *gadataset_ipc_file_format_new(void);
+
+
+#define GADATASET_TYPE_PARQUET_FILE_FORMAT \
+ (gadataset_parquet_file_format_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetParquetFileFormat,
+ gadataset_parquet_file_format,
+ GADATASET,
+ PARQUET_FILE_FORMAT,
+ GADatasetFileFormat)
+struct _GADatasetParquetFileFormatClass
+{
+ GADatasetFileFormatClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_3_0
+GADatasetParquetFileFormat *gadataset_parquet_file_format_new(void);
+
+
+G_END_DECLS
diff --git a/src/arrow/c_glib/arrow-dataset-glib/file-format.hpp b/src/arrow/c_glib/arrow-dataset-glib/file-format.hpp
new file mode 100644
index 000000000..636dc5c01
--- /dev/null
+++ b/src/arrow/c_glib/arrow-dataset-glib/file-format.hpp
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow/dataset/api.h>
+
+#include <arrow-dataset-glib/file-format.h>
+
+GADatasetFileWriteOptions *
+gadataset_file_write_options_new_raw(
+ std::shared_ptr<arrow::dataset::FileWriteOptions> *arrow_options);
+std::shared_ptr<arrow::dataset::FileWriteOptions>
+gadataset_file_write_options_get_raw(GADatasetFileWriteOptions *options);
+
+
+GADatasetFileWriter *
+gadataset_file_writer_new_raw(
+ std::shared_ptr<arrow::dataset::FileWriter> *arrow_writer);
+std::shared_ptr<arrow::dataset::FileWriter>
+gadataset_file_writer_get_raw(GADatasetFileWriter *writer);
+
+
+GADatasetFileFormat *
+gadataset_file_format_new_raw(
+ std::shared_ptr<arrow::dataset::FileFormat> *arrow_format);
+std::shared_ptr<arrow::dataset::FileFormat>
+gadataset_file_format_get_raw(GADatasetFileFormat *format);
diff --git a/src/arrow/c_glib/arrow-dataset-glib/fragment.cpp b/src/arrow/c_glib/arrow-dataset-glib/fragment.cpp
new file mode 100644
index 000000000..f2f0cd1c3
--- /dev/null
+++ b/src/arrow/c_glib/arrow-dataset-glib/fragment.cpp
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow-glib/record-batch.hpp>
+#include <arrow-glib/schema.hpp>
+
+#include <arrow-dataset-glib/fragment.hpp>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: fragment
+ * @section_id: fragment
+ * @title: Fragment classes
+ * @include: arrow-dataset-glib/arrow-dataset-glib.h
+ *
+ * #GADatasetFragment is a base class for all fragment classes.
+ *
+ * #GADatasetInMemoryFragment is a class for in-memory fragment.
+ *
+ * Since: 4.0.0
+ */
+
+/* arrow::dataset::Fragment */
+
+typedef struct GADatasetFragmentPrivate_ {
+ std::shared_ptr<arrow::dataset::Fragment> fragment;
+} GADatasetFragmentPrivate;
+
+enum {
+ PROP_FRAGMENT = 1,
+};
+
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADatasetFragment,
+ gadataset_fragment,
+ G_TYPE_OBJECT)
+
+#define GADATASET_FRAGMENT_GET_PRIVATE(obj) \
+ static_cast<GADatasetFragmentPrivate *>( \
+ gadataset_fragment_get_instance_private( \
+ GADATASET_FRAGMENT(obj)))
+
+static void
+gadataset_fragment_finalize(GObject *object)
+{
+ auto priv = GADATASET_FRAGMENT_GET_PRIVATE(object);
+
+ priv->fragment.~shared_ptr();
+
+ G_OBJECT_CLASS(gadataset_fragment_parent_class)->finalize(object);
+}
+
+static void
+gadataset_fragment_set_property(GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ auto priv = GADATASET_FRAGMENT_GET_PRIVATE(object);
+
+ switch (prop_id) {
+ case PROP_FRAGMENT:
+ priv->fragment =
+ *static_cast<std::shared_ptr<arrow::dataset::Fragment> *>(
+ g_value_get_pointer(value));
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+gadataset_fragment_init(GADatasetFragment *object)
+{
+ auto priv = GADATASET_FRAGMENT_GET_PRIVATE(object);
+ new(&priv->fragment) std::shared_ptr<arrow::dataset::Fragment>;
+}
+
+static void
+gadataset_fragment_class_init(GADatasetFragmentClass *klass)
+{
+ auto gobject_class = G_OBJECT_CLASS(klass);
+
+ gobject_class->finalize = gadataset_fragment_finalize;
+ gobject_class->set_property = gadataset_fragment_set_property;
+
+ GParamSpec *spec;
+ spec = g_param_spec_pointer("fragment",
+ "Fragment",
+ "The raw std::shared<arrow::dataset::Fragment> *",
+ static_cast<GParamFlags>(G_PARAM_WRITABLE |
+ G_PARAM_CONSTRUCT_ONLY));
+ g_object_class_install_property(gobject_class, PROP_FRAGMENT, spec);
+}
+
+/* arrow::dataset::InMemoryFragment */
+
+G_DEFINE_TYPE(GADatasetInMemoryFragment,
+ gadataset_in_memory_fragment,
+ GADATASET_TYPE_FRAGMENT)
+
+static void
+gadataset_in_memory_fragment_init(GADatasetInMemoryFragment *object)
+{
+}
+
+static void
+gadataset_in_memory_fragment_class_init(GADatasetInMemoryFragmentClass *klass)
+{
+}
+
+/**
+ * gadataset_in_memory_fragment_new:
+ * @schema: A #GArrowSchema.
+ * @record_batches: (array length=n_record_batches):
+ * (element-type GArrowRecordBatch): The record batches of the table.
+ * @n_record_batches: The number of record batches.
+ *
+ * Returns: A newly created #GADatasetInMemoryFragment.
+ *
+ * Since: 4.0.0
+ */
+GADatasetInMemoryFragment *
+gadataset_in_memory_fragment_new(GArrowSchema *schema,
+ GArrowRecordBatch **record_batches,
+ gsize n_record_batches)
+{
+ auto arrow_schema = garrow_schema_get_raw(schema);
+ std::vector<std::shared_ptr<arrow::RecordBatch>> arrow_record_batches;
+ arrow_record_batches.reserve(n_record_batches);
+ for (gsize i = 0; i < n_record_batches; ++i) {
+ auto arrow_record_batch = garrow_record_batch_get_raw(record_batches[i]);
+ arrow_record_batches.push_back(arrow_record_batch);
+ }
+ auto arrow_in_memory_fragment =
+ std::make_shared<arrow::dataset::InMemoryFragment>(arrow_schema,
+ arrow_record_batches);
+ return gadataset_in_memory_fragment_new_raw(&arrow_in_memory_fragment);
+}
+
+G_END_DECLS
+
+GADatasetFragment *
+gadataset_fragment_new_raw(
+ std::shared_ptr<arrow::dataset::Fragment> *arrow_fragment)
+{
+ auto fragment =
+ GADATASET_FRAGMENT(g_object_new(GADATASET_TYPE_FRAGMENT,
+ "fragment", arrow_fragment,
+ NULL));
+ return fragment;
+}
+
+std::shared_ptr<arrow::dataset::Fragment>
+gadataset_fragment_get_raw(GADatasetFragment *fragment)
+{
+ auto priv = GADATASET_FRAGMENT_GET_PRIVATE(fragment);
+ return priv->fragment;
+}
+
+GADatasetInMemoryFragment *
+gadataset_in_memory_fragment_new_raw(
+ std::shared_ptr<arrow::dataset::InMemoryFragment> *arrow_fragment)
+{
+ auto fragment =
+ GADATASET_IN_MEMORY_FRAGMENT(g_object_new(GADATASET_TYPE_IN_MEMORY_FRAGMENT,
+ "fragment", arrow_fragment,
+ NULL));
+ return fragment;
+}
diff --git a/src/arrow/c_glib/arrow-dataset-glib/fragment.h b/src/arrow/c_glib/arrow-dataset-glib/fragment.h
new file mode 100644
index 000000000..9376b6cf3
--- /dev/null
+++ b/src/arrow/c_glib/arrow-dataset-glib/fragment.h
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-glib/arrow-glib.h>
+
+G_BEGIN_DECLS
+
+/* arrow::dataset::Fragment */
+
+#define GADATASET_TYPE_FRAGMENT (gadataset_fragment_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetFragment,
+ gadataset_fragment,
+ GADATASET,
+ FRAGMENT,
+ GObject)
+struct _GADatasetFragmentClass
+{
+ GObjectClass parent_class;
+};
+
+/* arrow::dataset::InMemoryFragment */
+
+#define GADATASET_TYPE_IN_MEMORY_FRAGMENT \
+ (gadataset_in_memory_fragment_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetInMemoryFragment,
+ gadataset_in_memory_fragment,
+ GADATASET,
+ IN_MEMORY_FRAGMENT,
+ GADatasetFragment)
+struct _GADatasetInMemoryFragmentClass
+{
+ GADatasetFragmentClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_4_0
+GADatasetInMemoryFragment *
+gadataset_in_memory_fragment_new(GArrowSchema *schema,
+ GArrowRecordBatch **record_batches,
+ gsize n_record_batches);
+
+G_END_DECLS
diff --git a/src/arrow/c_glib/arrow-dataset-glib/fragment.hpp b/src/arrow/c_glib/arrow-dataset-glib/fragment.hpp
new file mode 100644
index 000000000..904f83653
--- /dev/null
+++ b/src/arrow/c_glib/arrow-dataset-glib/fragment.hpp
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow/dataset/api.h>
+
+#include <arrow-dataset-glib/fragment.h>
+
+std::shared_ptr<arrow::dataset::Fragment>
+gadataset_fragment_get_raw(GADatasetFragment *fragment);
+
+GADatasetFragment*
+gadataset_fragment_new_raw(
+ std::shared_ptr<arrow::dataset::Fragment> *arrow_fragment);
+
+GADatasetInMemoryFragment*
+gadataset_in_memory_fragment_new_raw(
+ std::shared_ptr<arrow::dataset::InMemoryFragment> *arrow_fragment);
diff --git a/src/arrow/c_glib/arrow-dataset-glib/meson.build b/src/arrow/c_glib/arrow-dataset-glib/meson.build
new file mode 100644
index 000000000..0d9b8564e
--- /dev/null
+++ b/src/arrow/c_glib/arrow-dataset-glib/meson.build
@@ -0,0 +1,104 @@
+# -*- indent-tabs-mode: nil -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+sources = files(
+ 'dataset-factory.cpp',
+ 'dataset.cpp',
+ 'file-format.cpp',
+ 'fragment.cpp',
+ 'partitioning.cpp',
+ 'scanner.cpp',
+)
+
+c_headers = files(
+ 'arrow-dataset-glib.h',
+ 'dataset-factory.h',
+ 'dataset.h',
+ 'file-format.h',
+ 'fragment.h',
+ 'partitioning.h',
+ 'scanner.h',
+)
+
+cpp_headers = files(
+ 'arrow-dataset-glib.hpp',
+ 'dataset-factory.hpp',
+ 'dataset.hpp',
+ 'file-format.hpp',
+ 'fragment.hpp',
+ 'partitioning.hpp',
+ 'scanner.hpp',
+)
+
+enums = gnome.mkenums('enums',
+ sources: c_headers,
+ identifier_prefix: 'GADataset',
+ symbol_prefix: 'gadataset',
+ c_template: 'enums.c.template',
+ h_template: 'enums.h.template',
+ install_dir: join_paths(include_dir, meson.project_name()),
+ install_header: true)
+enums_source = enums[0]
+enums_header = enums[1]
+
+
+headers = c_headers + cpp_headers
+install_headers(headers, subdir: 'arrow-dataset-glib')
+
+dependencies = [
+ arrow_dataset,
+ arrow_glib,
+]
+libarrow_dataset_glib = library('arrow-dataset-glib',
+ sources: sources + enums,
+ install: true,
+ dependencies: dependencies,
+ include_directories: base_include_directories,
+ soversion: so_version,
+ version: library_version)
+arrow_dataset_glib = declare_dependency(link_with: libarrow_dataset_glib,
+ include_directories: base_include_directories,
+ dependencies: dependencies,
+ sources: enums_header)
+
+pkgconfig.generate(libarrow_dataset_glib,
+ filebase: 'arrow-dataset-glib',
+ name: 'Apache Arrow Dataset GLib',
+ description: 'C API for Apache Arrow Dataset based on GLib',
+ version: version,
+ requires: ['arrow-glib', 'arrow-dataset'])
+
+if have_gi
+ gnome.generate_gir(libarrow_dataset_glib,
+ dependencies: declare_dependency(sources: arrow_glib_gir),
+ sources: sources + c_headers + enums,
+ namespace: 'ArrowDataset',
+ nsversion: api_version,
+ identifier_prefix: 'GADataset',
+ symbol_prefix: 'gadataset',
+ export_packages: 'arrow-dataset-glib',
+ includes: [
+ 'Arrow-1.0',
+ ],
+ install: true,
+ extra_args: [
+ '--warn-all',
+ '--include-uninstalled=./arrow-glib/Arrow-1.0.gir',
+ ])
+endif
diff --git a/src/arrow/c_glib/arrow-dataset-glib/partitioning.cpp b/src/arrow/c_glib/arrow-dataset-glib/partitioning.cpp
new file mode 100644
index 000000000..bce33671a
--- /dev/null
+++ b/src/arrow/c_glib/arrow-dataset-glib/partitioning.cpp
@@ -0,0 +1,440 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow-glib/array.hpp>
+#include <arrow-glib/error.hpp>
+#include <arrow-glib/schema.hpp>
+
+#include <arrow-dataset-glib/enums.h>
+#include <arrow-dataset-glib/partitioning.hpp>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: partitioning
+ * @section_id: partitioning
+ * @title: Partitioning classes
+ * @include: arrow-dataset-glib/arrow-dataset-glib.h
+ *
+ * #GADatasetPartitioningOptions is a class for partitioning options.
+ *
+ * #GADatasetPartitioning is a base class for partitioning classes
+ * such as #GADatasetDirectoryPartitioning.
+ *
+ * #GADatasetKeyValuePartitioning is a base class for key-value style
+ * partitioning classes such as #GADatasetDirectoryPartitioning.
+ *
+ * #GADatasetDirectoryPartitioning is a class for partitioning that
+ * uses directory structure.
+ *
+ * Since: 6.0.0
+ */
+
+typedef struct GADatasetPartitioningOptionsPrivate_ {
+ gboolean infer_dictionary;
+ GArrowSchema *schema;
+ GADatasetSegmentEncoding segment_encoding;
+} GADatasetPartitioningOptionsPrivate;
+
+enum {
+ PROP_INFER_DICTIONARY = 1,
+ PROP_SCHEMA,
+ PROP_SEGMENT_ENCODING,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetPartitioningOptions,
+ gadataset_partitioning_options,
+ G_TYPE_OBJECT)
+
+#define GADATASET_PARTITIONING_OPTIONS_GET_PRIVATE(obj) \
+ static_cast<GADatasetPartitioningOptionsPrivate *>( \
+ gadataset_partitioning_options_get_instance_private( \
+ GADATASET_PARTITIONING_OPTIONS(obj)))
+
+static void
+gadataset_partitioning_options_dispose(GObject *object)
+{
+ auto priv = GADATASET_PARTITIONING_OPTIONS_GET_PRIVATE(object);
+
+ if (priv->schema) {
+ g_object_unref(priv->schema);
+ priv->schema = nullptr;
+ }
+
+ G_OBJECT_CLASS(gadataset_partitioning_options_parent_class)->dispose(object);
+}
+
+static void
+gadataset_partitioning_options_set_property(GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ auto priv = GADATASET_PARTITIONING_OPTIONS_GET_PRIVATE(object);
+
+ switch (prop_id) {
+ case PROP_INFER_DICTIONARY:
+ priv->infer_dictionary = g_value_get_boolean(value);
+ break;
+ case PROP_SCHEMA:
+ {
+ auto schema = g_value_get_object(value);
+ if (priv->schema == schema) {
+ break;
+ }
+ auto old_schema = priv->schema;
+ if (schema) {
+ g_object_ref(schema);
+ priv->schema = GARROW_SCHEMA(schema);
+ } else {
+ priv->schema = NULL;
+ }
+ if (old_schema) {
+ g_object_unref(old_schema);
+ }
+ }
+ break;
+ case PROP_SEGMENT_ENCODING:
+ priv->segment_encoding =
+ static_cast<GADatasetSegmentEncoding>(g_value_get_enum(value));
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+gadataset_partitioning_options_get_property(GObject *object,
+ guint prop_id,
+ GValue *value,
+ GParamSpec *pspec)
+{
+ auto priv = GADATASET_PARTITIONING_OPTIONS_GET_PRIVATE(object);
+
+ switch (prop_id) {
+ case PROP_INFER_DICTIONARY:
+ g_value_set_boolean(value, priv->infer_dictionary);
+ break;
+ case PROP_SCHEMA:
+ g_value_set_object(value, priv->schema);
+ break;
+ case PROP_SEGMENT_ENCODING:
+ g_value_set_enum(value, priv->segment_encoding);
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+gadataset_partitioning_options_init(GADatasetPartitioningOptions *object)
+{
+}
+
+static void
+gadataset_partitioning_options_class_init(
+ GADatasetPartitioningOptionsClass *klass)
+{
+ auto gobject_class = G_OBJECT_CLASS(klass);
+
+ gobject_class->dispose = gadataset_partitioning_options_dispose;
+ gobject_class->set_property = gadataset_partitioning_options_set_property;
+ gobject_class->get_property = gadataset_partitioning_options_get_property;
+
+ arrow::dataset::PartitioningFactoryOptions default_options;
+ GParamSpec *spec;
+ /**
+ * GADatasetPartitioningOptions:infer-dictionary:
+ *
+ * When inferring a schema for partition fields, yield dictionary
+ * encoded types instead of plain. This can be more efficient when
+ * materializing virtual columns, and Expressions parsed by the
+ * finished Partitioning will include dictionaries of all unique
+ * inspected values for each field.
+ *
+ * Since: 6.0.0
+ */
+ spec = g_param_spec_boolean("infer-dictionary",
+ "Infer dictionary",
+ "Whether encode partitioned field values as "
+ "dictionary",
+ default_options.infer_dictionary,
+ static_cast<GParamFlags>(G_PARAM_READWRITE));
+ g_object_class_install_property(gobject_class, PROP_INFER_DICTIONARY, spec);
+
+ /**
+ * GADatasetPartitioningOptions:schema:
+ *
+ * Optionally, an expected schema can be provided, in which case
+ * inference will only check discovered fields against the schema
+ * and update internal state (such as dictionaries).
+ *
+ * Since: 6.0.0
+ */
+ spec = g_param_spec_object("schema",
+ "Schema",
+ "Inference will only check discovered fields "
+ "against the schema and update internal state",
+ GARROW_TYPE_SCHEMA,
+ static_cast<GParamFlags>(G_PARAM_READWRITE));
+ g_object_class_install_property(gobject_class, PROP_SCHEMA, spec);
+
+ /**
+ * GADatasetPartitioningOptions:segment-encoding:
+ *
+ * After splitting a path into components, decode the path
+ * components before parsing according to this scheme.
+ *
+ * Since: 6.0.0
+ */
+ spec = g_param_spec_enum("segment-encoding",
+ "Segment encoding",
+ "After splitting a path into components, "
+ "decode the path components before "
+ "parsing according to this scheme",
+ GADATASET_TYPE_SEGMENT_ENCODING,
+ static_cast<GADatasetSegmentEncoding>(
+ default_options.segment_encoding),
+ static_cast<GParamFlags>(G_PARAM_READWRITE));
+ g_object_class_install_property(gobject_class, PROP_SEGMENT_ENCODING, spec);
+}
+
+/**
+ * gadataset_partitioning_options_new:
+ *
+ * Returns: The newly created #GADatasetPartitioningOptions.
+ *
+ * Since: 6.0.0
+ */
+GADatasetPartitioningOptions *
+gadataset_partitioning_options_new(void)
+{
+ return GADATASET_PARTITIONING_OPTIONS(
+ g_object_new(GADATASET_TYPE_PARTITIONING_OPTIONS,
+ NULL));
+}
+
+
+typedef struct GADatasetPartitioningPrivate_ {
+ std::shared_ptr<arrow::dataset::Partitioning> partitioning;
+} GADatasetPartitioningPrivate;
+
+enum {
+ PROP_PARTITIONING = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetPartitioning,
+ gadataset_partitioning,
+ G_TYPE_OBJECT)
+
+#define GADATASET_PARTITIONING_GET_PRIVATE(obj) \
+ static_cast<GADatasetPartitioningPrivate *>( \
+ gadataset_partitioning_get_instance_private( \
+ GADATASET_PARTITIONING(obj)))
+
+static void
+gadataset_partitioning_finalize(GObject *object)
+{
+ auto priv = GADATASET_PARTITIONING_GET_PRIVATE(object);
+ priv->partitioning.~shared_ptr();
+ G_OBJECT_CLASS(gadataset_partitioning_parent_class)->finalize(object);
+}
+
+static void
+gadataset_partitioning_set_property(GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ auto priv = GADATASET_PARTITIONING_GET_PRIVATE(object);
+
+ switch (prop_id) {
+ case PROP_PARTITIONING:
+ priv->partitioning =
+ *static_cast<std::shared_ptr<arrow::dataset::Partitioning> *>(
+ g_value_get_pointer(value));
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+gadataset_partitioning_init(GADatasetPartitioning *object)
+{
+ auto priv = GADATASET_PARTITIONING_GET_PRIVATE(object);
+ new(&priv->partitioning) std::shared_ptr<arrow::dataset::Partitioning>;
+}
+
+static void
+gadataset_partitioning_class_init(GADatasetPartitioningClass *klass)
+{
+ auto gobject_class = G_OBJECT_CLASS(klass);
+
+ gobject_class->finalize = gadataset_partitioning_finalize;
+ gobject_class->set_property = gadataset_partitioning_set_property;
+
+ GParamSpec *spec;
+ spec = g_param_spec_pointer("partitioning",
+ "Partitioning",
+ "The raw "
+ "std::shared<arrow::dataset::Partitioning> *",
+ static_cast<GParamFlags>(G_PARAM_WRITABLE |
+ G_PARAM_CONSTRUCT_ONLY));
+ g_object_class_install_property(gobject_class, PROP_PARTITIONING, spec);
+}
+
+/**
+ * gadataset_partitioning_new:
+ *
+ * Returns: The newly created #GADatasetPartitioning that doesn't
+ * partition.
+ *
+ * Since: 6.0.0
+ */
+GADatasetPartitioning *
+gadataset_partitioning_new(void)
+{
+ auto arrow_partitioning = arrow::dataset::Partitioning::Default();
+ return GADATASET_PARTITIONING(
+ g_object_new(GADATASET_TYPE_PARTITIONING,
+ "partitioning", &arrow_partitioning,
+ NULL));
+}
+
+/**
+ * gadataset_partitioning_get_type_name:
+ * @partitioning: A #GADatasetPartitioning.
+ *
+ * Returns: The type name of @partitioning.
+ *
+ * It should be freed with g_free() when no longer needed.
+ *
+ * Since: 6.0.0
+ */
+gchar *
+gadataset_partitioning_get_type_name(GADatasetPartitioning *partitioning)
+{
+ auto arrow_partitioning = gadataset_partitioning_get_raw(partitioning);
+ auto arrow_type_name = arrow_partitioning->type_name();
+ return g_strndup(arrow_type_name.c_str(),
+ arrow_type_name.size());
+}
+
+
+G_DEFINE_TYPE(GADatasetKeyValuePartitioning,
+ gadataset_key_value_partitioning,
+ GADATASET_TYPE_PARTITIONING)
+
+static void
+gadataset_key_value_partitioning_init(GADatasetKeyValuePartitioning *object)
+{
+}
+
+static void
+gadataset_key_value_partitioning_class_init(
+ GADatasetKeyValuePartitioningClass *klass)
+{
+}
+
+
+G_DEFINE_TYPE(GADatasetDirectoryPartitioning,
+ gadataset_directory_partitioning,
+ GADATASET_TYPE_KEY_VALUE_PARTITIONING)
+
+static void
+gadataset_directory_partitioning_init(GADatasetDirectoryPartitioning *object)
+{
+}
+
+static void
+gadataset_directory_partitioning_class_init(
+ GADatasetDirectoryPartitioningClass *klass)
+{
+}
+
+/**
+ * gadataset_directory_partitioning_new:
+ * @schema: A #GArrowSchema that describes all partitioned segments.
+ * @dictionaries: (nullable) (element-type GArrowArray): A list of #GArrowArray
+ * for dictionary data types in @schema.
+ * @options: (nullable): A #GADatasetPartitioningOptions.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: The newly created #GADatasetDirectoryPartitioning on success,
+ * %NULL on error.
+ *
+ * Since: 6.0.0
+ */
+GADatasetDirectoryPartitioning *
+gadataset_directory_partitioning_new(GArrowSchema *schema,
+ GList *dictionaries,
+ GADatasetPartitioningOptions *options,
+ GError **error)
+{
+ auto arrow_schema = garrow_schema_get_raw(schema);
+ std::vector<std::shared_ptr<arrow::Array>> arrow_dictionaries;
+ for (auto node = dictionaries; node; node = node->next) {
+ auto dictionary = GARROW_ARRAY(node->data);
+ if (dictionary) {
+ arrow_dictionaries.push_back(garrow_array_get_raw(dictionary));
+ } else {
+ arrow_dictionaries.push_back(nullptr);
+ }
+ }
+ arrow::dataset::KeyValuePartitioningOptions arrow_options;
+ if (options) {
+ arrow_options =
+ gadataset_partitioning_options_get_raw_key_value_partitioning_options(
+ options);
+ }
+ auto arrow_partitioning =
+ std::make_shared<arrow::dataset::DirectoryPartitioning>(
+ arrow_schema,
+ arrow_dictionaries,
+ arrow_options);
+ return GADATASET_DIRECTORY_PARTITIONING(
+ g_object_new(GADATASET_TYPE_DIRECTORY_PARTITIONING,
+ "partitioning", &arrow_partitioning,
+ NULL));
+}
+
+
+G_END_DECLS
+
+arrow::dataset::KeyValuePartitioningOptions
+gadataset_partitioning_options_get_raw_key_value_partitioning_options(
+ GADatasetPartitioningOptions *options)
+{
+ auto priv = GADATASET_PARTITIONING_OPTIONS_GET_PRIVATE(options);
+ arrow::dataset::KeyValuePartitioningOptions arrow_options;
+ arrow_options.segment_encoding =
+ static_cast<arrow::dataset::SegmentEncoding>(priv->segment_encoding);
+ return arrow_options;
+}
+
+std::shared_ptr<arrow::dataset::Partitioning>
+gadataset_partitioning_get_raw(GADatasetPartitioning *partitioning)
+{
+ auto priv = GADATASET_PARTITIONING_GET_PRIVATE(partitioning);
+ return priv->partitioning;
+}
diff --git a/src/arrow/c_glib/arrow-dataset-glib/partitioning.h b/src/arrow/c_glib/arrow-dataset-glib/partitioning.h
new file mode 100644
index 000000000..d408d9bd5
--- /dev/null
+++ b/src/arrow/c_glib/arrow-dataset-glib/partitioning.h
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-glib/arrow-glib.h>
+
+G_BEGIN_DECLS
+
+/**
+ * GADatasetSegmentEncoding
+ * @GADATASET_SEGMENT_ENCODING_NONE: No encoding.
+ * @GADATASET_SEGMENT_ENCODING_URI: Segment values are URL-encoded.
+ *
+ * They are corresponding to `arrow::dataset::SegmentEncoding` values.
+ *
+ * Since: 6.0.0
+ */
+typedef enum {
+ GADATASET_SEGMENT_ENCODING_NONE,
+ GADATASET_SEGMENT_ENCODING_URI,
+} GADatasetSegmentEncoding;
+
+
+#define GADATASET_TYPE_PARTITIONING_OPTIONS \
+ (gadataset_partitioning_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetPartitioningOptions,
+ gadataset_partitioning_options,
+ GADATASET,
+ PARTITIONING_OPTIONS,
+ GObject)
+struct _GADatasetPartitioningOptionsClass
+{
+ GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_6_0
+GADatasetPartitioningOptions *
+gadataset_partitioning_options_new(void);
+
+
+#define GADATASET_TYPE_PARTITIONING (gadataset_partitioning_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetPartitioning,
+ gadataset_partitioning,
+ GADATASET,
+ PARTITIONING,
+ GObject)
+struct _GADatasetPartitioningClass
+{
+ GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_6_0
+GADatasetPartitioning *
+gadataset_partitioning_new(void);
+GARROW_AVAILABLE_IN_6_0
+gchar *
+gadataset_partitioning_get_type_name(GADatasetPartitioning *partitioning);
+
+
+#define GADATASET_TYPE_KEY_VALUE_PARTITIONING \
+ (gadataset_key_value_partitioning_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetKeyValuePartitioning,
+ gadataset_key_value_partitioning,
+ GADATASET,
+ KEY_VALUE_PARTITIONING,
+ GADatasetPartitioning)
+struct _GADatasetKeyValuePartitioningClass
+{
+ GADatasetPartitioningClass parent_class;
+};
+
+
+#define GADATASET_TYPE_DIRECTORY_PARTITIONING \
+ (gadataset_directory_partitioning_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetDirectoryPartitioning,
+ gadataset_directory_partitioning,
+ GADATASET,
+ DIRECTORY_PARTITIONING,
+ GADatasetKeyValuePartitioning)
+struct _GADatasetDirectoryPartitioningClass
+{
+ GADatasetKeyValuePartitioningClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_6_0
+GADatasetDirectoryPartitioning *
+gadataset_directory_partitioning_new(GArrowSchema *schema,
+ GList *dictionaries,
+ GADatasetPartitioningOptions *options,
+ GError **error);
+
+
+G_END_DECLS
diff --git a/src/arrow/c_glib/arrow-dataset-glib/partitioning.hpp b/src/arrow/c_glib/arrow-dataset-glib/partitioning.hpp
new file mode 100644
index 000000000..2481ecb33
--- /dev/null
+++ b/src/arrow/c_glib/arrow-dataset-glib/partitioning.hpp
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow/dataset/api.h>
+
+#include <arrow-dataset-glib/partitioning.h>
+
+arrow::dataset::KeyValuePartitioningOptions
+gadataset_partitioning_options_get_raw_key_value_partitioning_options(
+ GADatasetPartitioningOptions *options);
+
+std::shared_ptr<arrow::dataset::Partitioning>
+gadataset_partitioning_get_raw(GADatasetPartitioning *partitioning);
diff --git a/src/arrow/c_glib/arrow-dataset-glib/scanner.cpp b/src/arrow/c_glib/arrow-dataset-glib/scanner.cpp
new file mode 100644
index 000000000..51542bb0a
--- /dev/null
+++ b/src/arrow/c_glib/arrow-dataset-glib/scanner.cpp
@@ -0,0 +1,351 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow-glib/error.hpp>
+#include <arrow-glib/expression.hpp>
+#include <arrow-glib/reader.hpp>
+#include <arrow-glib/table.hpp>
+
+#include <arrow-dataset-glib/dataset.hpp>
+#include <arrow-dataset-glib/scanner.hpp>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: scanner
+ * @section_id: scanner
+ * @title: Scanner related classes
+ * @include: arrow-dataset-glib/arrow-dataset-glib.h
+ *
+ * #GADatasetScanner is a class for scanning dataset.
+ *
+ * #GADatasetScannerBuilder is a class for building a scanner.
+ *
+ * Since: 5.0.0
+ */
+
+typedef struct GADatasetScannerPrivate_ {
+ std::shared_ptr<arrow::dataset::Scanner> scanner;
+} GADatasetScannerPrivate;
+
+enum {
+ PROP_SCANNER = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetScanner,
+ gadataset_scanner,
+ G_TYPE_OBJECT)
+
+#define GADATASET_SCANNER_GET_PRIVATE(obj) \
+ static_cast<GADatasetScannerPrivate *>( \
+ gadataset_scanner_get_instance_private( \
+ GADATASET_SCANNER(obj)))
+
+static void
+gadataset_scanner_finalize(GObject *object)
+{
+ auto priv = GADATASET_SCANNER_GET_PRIVATE(object);
+ priv->scanner.~shared_ptr();
+ G_OBJECT_CLASS(gadataset_scanner_parent_class)->finalize(object);
+}
+
+static void
+gadataset_scanner_set_property(GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ auto priv = GADATASET_SCANNER_GET_PRIVATE(object);
+
+ switch (prop_id) {
+ case PROP_SCANNER:
+ priv->scanner =
+ *static_cast<std::shared_ptr<arrow::dataset::Scanner> *>(
+ g_value_get_pointer(value));
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+gadataset_scanner_init(GADatasetScanner *object)
+{
+ auto priv = GADATASET_SCANNER_GET_PRIVATE(object);
+ new(&priv->scanner) std::shared_ptr<arrow::dataset::Scanner>;
+}
+
+static void
+gadataset_scanner_class_init(GADatasetScannerClass *klass)
+{
+ auto gobject_class = G_OBJECT_CLASS(klass);
+ gobject_class->finalize = gadataset_scanner_finalize;
+ gobject_class->set_property = gadataset_scanner_set_property;
+
+ GParamSpec *spec;
+ spec = g_param_spec_pointer("scanner",
+ "Scanner",
+ "The raw std::shared<arrow::dataset::Scanner> *",
+ static_cast<GParamFlags>(G_PARAM_WRITABLE |
+ G_PARAM_CONSTRUCT_ONLY));
+ g_object_class_install_property(gobject_class, PROP_SCANNER, spec);
+}
+
+/**
+ * gadataset_scanner_to_table:
+ * @scanner: A #GADatasetScanner.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full) (nullable):
+ * A newly created #GArrowTable on success, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GArrowTable *
+gadataset_scanner_to_table(GADatasetScanner *scanner,
+ GError **error)
+{
+ auto arrow_scanner = gadataset_scanner_get_raw(scanner);
+ auto arrow_table_result = arrow_scanner->ToTable();
+ if (garrow::check(error, arrow_table_result, "[scanner][to-table]")) {
+ auto arrow_table = *arrow_table_result;
+ return garrow_table_new_raw(&arrow_table);
+ } else {
+ return NULL;
+ }
+}
+
+
+typedef struct GADatasetScannerBuilderPrivate_ {
+ std::shared_ptr<arrow::dataset::ScannerBuilder> scanner_builder;
+} GADatasetScannerBuilderPrivate;
+
+enum {
+ PROP_SCANNER_BUILDER = 1,
+ PROP_USE_ASYNC,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetScannerBuilder,
+ gadataset_scanner_builder,
+ G_TYPE_OBJECT)
+
+#define GADATASET_SCANNER_BUILDER_GET_PRIVATE(obj) \
+ static_cast<GADatasetScannerBuilderPrivate *>( \
+ gadataset_scanner_builder_get_instance_private( \
+ GADATASET_SCANNER_BUILDER(obj)))
+
+static void
+gadataset_scanner_builder_finalize(GObject *object)
+{
+ auto priv = GADATASET_SCANNER_BUILDER_GET_PRIVATE(object);
+ priv->scanner_builder.~shared_ptr();
+ G_OBJECT_CLASS(gadataset_scanner_builder_parent_class)->finalize(object);
+}
+
+static void
+gadataset_scanner_builder_set_property(GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ auto priv = GADATASET_SCANNER_BUILDER_GET_PRIVATE(object);
+
+ switch (prop_id) {
+ case PROP_SCANNER_BUILDER:
+ priv->scanner_builder =
+ *static_cast<std::shared_ptr<arrow::dataset::ScannerBuilder> *>(
+ g_value_get_pointer(value));
+ break;
+ case PROP_USE_ASYNC:
+ garrow::check(nullptr,
+ priv->scanner_builder->UseAsync(g_value_get_boolean(value)),
+ "[scanner-builder][use-async][set]");
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+gadataset_scanner_builder_init(GADatasetScannerBuilder *object)
+{
+ auto priv = GADATASET_SCANNER_BUILDER_GET_PRIVATE(object);
+ new(&priv->scanner_builder) std::shared_ptr<arrow::dataset::ScannerBuilder>;
+}
+
+static void
+gadataset_scanner_builder_class_init(GADatasetScannerBuilderClass *klass)
+{
+ auto gobject_class = G_OBJECT_CLASS(klass);
+ gobject_class->finalize = gadataset_scanner_builder_finalize;
+ gobject_class->set_property = gadataset_scanner_builder_set_property;
+
+ GParamSpec *spec;
+ spec = g_param_spec_pointer("scanner-builder",
+ "Scanner builder",
+ "The raw "
+ "std::shared<arrow::dataset::ScannerBuilder> *",
+ static_cast<GParamFlags>(G_PARAM_WRITABLE |
+ G_PARAM_CONSTRUCT_ONLY));
+ g_object_class_install_property(gobject_class, PROP_SCANNER_BUILDER, spec);
+
+ arrow::dataset::ScanOptions default_options;
+ /**
+ * GADatasetScannerBuilder:use-async:
+ *
+ * Whether or not async mode is used.
+ *
+ * Since: 6.0.0
+ */
+ spec = g_param_spec_boolean("use-async",
+ "Use async",
+ "Whether or not async mode is used",
+ default_options.use_async,
+ static_cast<GParamFlags>(G_PARAM_WRITABLE));
+ g_object_class_install_property(gobject_class, PROP_USE_ASYNC, spec);
+}
+
+/**
+ * gadataset_scanner_builder_new:
+ * @dataset: A #GADatasetDataset to be scanned.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): A newly created #GADatasetScannerBuilder on success,
+ * %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GADatasetScannerBuilder *
+gadataset_scanner_builder_new(GADatasetDataset *dataset, GError **error)
+{
+ auto arrow_dataset = gadataset_dataset_get_raw(dataset);
+ auto arrow_scanner_builder_result = arrow_dataset->NewScan();
+ if (garrow::check(error,
+ arrow_scanner_builder_result,
+ "[scanner-builder][new]")) {
+ auto arrow_scanner_builder = *arrow_scanner_builder_result;
+ return gadataset_scanner_builder_new_raw(&arrow_scanner_builder);
+ } else {
+ return NULL;
+ }
+}
+
+/**
+ * gadataset_scanner_builder_new_record_batch_reader:
+ * @reader: A #GArrowRecordBatchReader that produces record batches.
+ *
+ * Returns: (nullable): A newly created #GADatasetScannerBuilder.
+ *
+ * Since: 6.0.0
+ */
+GADatasetScannerBuilder *
+gadataset_scanner_builder_new_record_batch_reader(
+ GArrowRecordBatchReader *reader)
+{
+ auto arrow_reader = garrow_record_batch_reader_get_raw(reader);
+ auto arrow_scanner_builder =
+ arrow::dataset::ScannerBuilder::FromRecordBatchReader(arrow_reader);
+ return gadataset_scanner_builder_new_raw(&arrow_scanner_builder);
+}
+
+/**
+ * gadataset_scanner_builder_set_filter:
+ * @builder: A #GADatasetScannerBuilder.
+ * @expression: A #GArrowExpression to filter rows with.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 6.0.0
+ */
+gboolean
+gadataset_scanner_builder_set_filter(GADatasetScannerBuilder *builder,
+ GArrowExpression *expression,
+ GError **error)
+{
+ auto arrow_builder = gadataset_scanner_builder_get_raw(builder);
+ auto arrow_expression = garrow_expression_get_raw(expression);
+ return garrow::check(error,
+ arrow_builder->Filter(*arrow_expression),
+ "[scanner-builder][filter][set]");
+}
+
+/**
+ * gadataset_scanner_builder_finish:
+ * @builder: A #GADatasetScannerBuilder.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full) (nullable):
+ * A newly created #GADatasetScanner on success, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GADatasetScanner *
+gadataset_scanner_builder_finish(GADatasetScannerBuilder *builder,
+ GError **error)
+{
+ auto arrow_builder = gadataset_scanner_builder_get_raw(builder);
+ auto arrow_scanner_result = arrow_builder->Finish();
+ if (garrow::check(error, arrow_scanner_result, "[scanner-builder][finish]")) {
+ auto arrow_scanner = *arrow_scanner_result;
+ return gadataset_scanner_new_raw(&arrow_scanner);
+ } else {
+ return NULL;
+ }
+}
+
+
+G_END_DECLS
+
+GADatasetScanner *
+gadataset_scanner_new_raw(
+ std::shared_ptr<arrow::dataset::Scanner> *arrow_scanner)
+{
+ auto scanner =
+ GADATASET_SCANNER(g_object_new(GADATASET_TYPE_SCANNER,
+ "scanner", arrow_scanner,
+ NULL));
+ return scanner;
+}
+
+std::shared_ptr<arrow::dataset::Scanner>
+gadataset_scanner_get_raw(GADatasetScanner *scanner)
+{
+ auto priv = GADATASET_SCANNER_GET_PRIVATE(scanner);
+ return priv->scanner;
+}
+
+GADatasetScannerBuilder *
+gadataset_scanner_builder_new_raw(
+ std::shared_ptr<arrow::dataset::ScannerBuilder> *arrow_scanner_builder)
+{
+ return GADATASET_SCANNER_BUILDER(
+ g_object_new(GADATASET_TYPE_SCANNER_BUILDER,
+ "scanner-builder", arrow_scanner_builder,
+ NULL));
+}
+
+std::shared_ptr<arrow::dataset::ScannerBuilder>
+gadataset_scanner_builder_get_raw(GADatasetScannerBuilder *scanner_builder)
+{
+ auto priv = GADATASET_SCANNER_BUILDER_GET_PRIVATE(scanner_builder);
+ return priv->scanner_builder;
+}
diff --git a/src/arrow/c_glib/arrow-dataset-glib/scanner.h b/src/arrow/c_glib/arrow-dataset-glib/scanner.h
new file mode 100644
index 000000000..59da2577d
--- /dev/null
+++ b/src/arrow/c_glib/arrow-dataset-glib/scanner.h
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-dataset-glib/dataset.h>
+#include <arrow-dataset-glib/fragment.h>
+
+G_BEGIN_DECLS
+
+#define GADATASET_TYPE_SCANNER (gadataset_scanner_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetScanner,
+ gadataset_scanner,
+ GADATASET,
+ SCANNER,
+ GObject)
+struct _GADatasetScannerClass
+{
+ GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowTable *
+gadataset_scanner_to_table(GADatasetScanner *scanner,
+ GError **error);
+
+#define GADATASET_TYPE_SCANNER_BUILDER (gadataset_scanner_builder_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetScannerBuilder,
+ gadataset_scanner_builder,
+ GADATASET,
+ SCANNER_BUILDER,
+ GObject)
+struct _GADatasetScannerBuilderClass
+{
+ GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GADatasetScannerBuilder *
+gadataset_scanner_builder_new(GADatasetDataset *dataset,
+ GError **error);
+GARROW_AVAILABLE_IN_6_0
+GADatasetScannerBuilder *
+gadataset_scanner_builder_new_record_batch_reader(
+ GArrowRecordBatchReader *reader);
+
+GARROW_AVAILABLE_IN_6_0
+gboolean
+gadataset_scanner_builder_set_filter(GADatasetScannerBuilder *builder,
+ GArrowExpression *expression,
+ GError **error);
+
+GARROW_AVAILABLE_IN_5_0
+GADatasetScanner *
+gadataset_scanner_builder_finish(GADatasetScannerBuilder *builder,
+ GError **error);
+
+G_END_DECLS
diff --git a/src/arrow/c_glib/arrow-dataset-glib/scanner.hpp b/src/arrow/c_glib/arrow-dataset-glib/scanner.hpp
new file mode 100644
index 000000000..663ab6fc4
--- /dev/null
+++ b/src/arrow/c_glib/arrow-dataset-glib/scanner.hpp
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow/dataset/api.h>
+
+#include <arrow-dataset-glib/fragment.h>
+#include <arrow-dataset-glib/scanner.h>
+
+GADatasetScanner *
+gadataset_scanner_new_raw(
+ std::shared_ptr<arrow::dataset::Scanner> *arrow_scanner);
+std::shared_ptr<arrow::dataset::Scanner>
+gadataset_scanner_get_raw(GADatasetScanner *scanner);
+
+GADatasetScannerBuilder *
+gadataset_scanner_builder_new_raw(
+ std::shared_ptr<arrow::dataset::ScannerBuilder> *arrow_scanner_builder);
+std::shared_ptr<arrow::dataset::ScannerBuilder>
+gadataset_scanner_builder_get_raw(GADatasetScannerBuilder *scanner_builder);