diff options
Diffstat (limited to 'src/arrow/c_glib')
472 files changed, 83719 insertions, 0 deletions
diff --git a/src/arrow/c_glib/.gitignore b/src/arrow/c_glib/.gitignore new file mode 100644 index 000000000..0c813e8d3 --- /dev/null +++ b/src/arrow/c_glib/.gitignore @@ -0,0 +1,70 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +Makefile +Makefile.in +.deps/ +.libs/ +*.gir +*.typelib +*.o +*.lo +*.la +*~ +/LICENSE.txt +/*.tar.gz +/aclocal.m4 +/autom4te.cache/ +/config.h +/config.h.in +/config.log +/config.status +/config/ +/configure +/doc/*-glib/*.txt +/doc/*-glib/*.txt.bak +/doc/*-glib/*.args +/doc/*-glib/*.hierarchy +/doc/*-glib/*.interfaces +/doc/*-glib/*.prerequisites +/doc/*-glib/*.signals +/doc/*-glib/*.types +/doc/*-glib/entities.xml +/doc/*-glib/*.stamp +/doc/*-glib/html/ +/doc/*-glib/xml/ +/doc/*-glib/tmpl/ +/libtool +/m4/ +/stamp-h1 +/arrow-cuda-glib/*.pc +/*-glib/enums.c +/*-glib/enums.h +/*-glib/stamp-* +/arrow-glib/version.h +/arrow-glib/*.pc +/gandiva-glib/version.h +/gandiva-glib/*.pc +/parquet-glib/version.h +/parquet-glib/*.pc +/plasma-glib/*.pc +/example/build +/example/extension-type +/example/read-batch +/example/read-stream +/gtk-doc.make +/build/ diff --git a/src/arrow/c_glib/Brewfile b/src/arrow/c_glib/Brewfile new file mode 100644 index 000000000..ba65853a7 --- /dev/null +++ b/src/arrow/c_glib/Brewfile @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +brew "autoconf-archive" +brew "gobject-introspection" +brew "gtk-doc" +brew "libtool" +brew "meson" diff --git a/src/arrow/c_glib/Gemfile b/src/arrow/c_glib/Gemfile new file mode 100644 index 000000000..015f962de --- /dev/null +++ b/src/arrow/c_glib/Gemfile @@ -0,0 +1,23 @@ +# -*- ruby -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +source "https://rubygems.org/" + +gem "test-unit" +gem "gobject-introspection", ">= 3.4.9" diff --git a/src/arrow/c_glib/README.md b/src/arrow/c_glib/README.md new file mode 100644 index 000000000..ac179354d --- /dev/null +++ b/src/arrow/c_glib/README.md @@ -0,0 +1,315 @@ +<!--- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +# Arrow GLib + +Arrow GLib is a wrapper library for [Arrow +C++](https://github.com/apache/arrow/tree/master/cpp). Arrow GLib +provides C API. + +Arrow GLib supports [GObject +Introspection](https://wiki.gnome.org/action/show/Projects/GObjectIntrospection). +It means that you can create language bindings at runtime or compile +time. + +For example, you can use Apache Arrow from Ruby by Arrow GLib and +[gobject-introspection gem](https://rubygems.org/gems/gobject-introspection) +with the following code: + +```ruby +# Generate bindings at runtime +require "gi" +Arrow = GI.load("Arrow") + +# Now, you can access arrow::BooleanArray in Arrow C++ by +# Arrow::BooleanArray +p Arrow::BooleanArray +``` + +In Ruby case, you should use +[red-arrow gem](https://rubygems.org/gems/red-arrow). It's based on +gobject-introspection gem. It adds many convenient features to raw +gobject-introspection gem based bindings. + +## Install + +You can use packages or build by yourself to install Arrow GLib. It's +recommended that you use packages. + +We use Meson and Ninja as build tools. If you find problems when +installing please see [common build +problems](https://github.com/apache/arrow/blob/master/c_glib/README.md#common-build-problems). + +### Packages + +See [install document](https://arrow.apache.org/install/) for details. + +### How to build by users + +Arrow GLib users should use released source archive to build Arrow +GLib (replace the version number in the following commands with the +one you use): + +```console +% wget https://downloads.apache.org/arrow/arrow-3.0.0/apache-arrow-3.0.0.tar.gz +% tar xf apache-arrow-3.0.0.tar.gz +% cd apache-arrow-3.0.0 +``` + +You need to build and install Arrow C++ before you build and install +Arrow GLib. See Arrow C++ document about how to install Arrow C++. + +If you use macOS with [Homebrew](https://brew.sh/), you must install +required packages. + +macOS: + +```console +$ brew bundle +$ meson setup c_glib.build c_glib --buildtype=release +$ meson compile -C c_glib.build +$ sudo meson install -C c_glib.build +``` + +Others: + +```console +$ meson setup c_glib.build c_glib --buildtype=release +$ meson compile -C c_glib.build +$ sudo meson install -C build +``` + +### How to build by developers + +You need to install Arrow C++ before you install Arrow GLib. See Arrow +C++ document about how to install Arrow C++. + +You need [GTK-Doc](https://www.gtk.org/gtk-doc/) and +[GObject Introspection](https://wiki.gnome.org/Projects/GObjectIntrospection) +to build Arrow GLib. You can install them by the followings: + +On Debian GNU/Linux or Ubuntu: + +```console +$ sudo apt install -y -V gtk-doc-tools libgirepository1.0-dev meson ninja-build +``` + +On CentOS 7: + +```console +$ sudo yum install -y gtk-doc gobject-introspection-devel ninja-build +$ sudo pip3 install meson +``` + +On CentOS 8 or later: + +```console +$ sudo dnf install -y --enablerepo=powertools gtk-doc gobject-introspection-devel ninja-build +$ sudo pip3 install meson +``` + +On macOS with [Homebrew](https://brew.sh/): + +```console +$ brew bundle +``` + +You can build and install Arrow GLib by the followings: + +macOS: + +```console +$ XML_CATALOG_FILES=$(brew --prefix)/etc/xml/catalog +$ meson setup c_glib.build c_glib -Dgtk_doc=true +$ meson compile -C c_glib.build +$ sudo meson install -C c_glib.build +``` + +Others: + +```console +$ meson c_glib.build c_glib -Dgtk_doc=true +$ meson compile -C c_glib.build +$ sudo meson install -C c_glib.build +``` + +## Usage + +You can use Arrow GLib with C or other languages. If you use Arrow +GLib with C, you use C API. If you use Arrow GLib with other +languages, you use GObject Introspection based bindings. + +### C + +You can find API reference in the +`/usr/local/share/gtk-doc/html/arrow-glib/` directory. If you specify +`--prefix` to `meson`, the directory will be different. + +You can find example codes in the `example/` directory. + +### Language bindings + +You can use Arrow GLib with non-C languages with GObject Introspection +based bindings. Here are languages that support GObject Introspection: + + * Ruby: [red-arrow gem](https://rubygems.org/gems/red-arrow) should be used. + * Examples: https://github.com/red-data-tools/red-arrow/tree/master/example + + * Python: [PyGObject](https://wiki.gnome.org/Projects/PyGObject) should be used. (Note that you should prefer PyArrow over Arrow GLib.) + + * Lua: [LGI](https://github.com/pavouk/lgi) should be used. + * Examples: `example/lua/` directory. + + * Go: [Go-gir-generator](https://github.com/linuxdeepin/go-gir-generator) should be used. (Note that you should use Apache Arrow for Go than Arrow GLib.) + +See also +[Projects/GObjectIntrospection/Users - GNOME Wiki!](https://wiki.gnome.org/Projects/GObjectIntrospection/Users) +for other languages. + +## How to run test + +Arrow GLib has unit tests. You can confirm that you install Arrow +GLib correctly by running unit tests. + +You need to install the followings to run unit tests: + + * [Ruby](https://www.ruby-lang.org/) + * [gobject-introspection gem](https://rubygems.org/gems/gobject-introspection) + * [test-unit gem](https://rubygems.org/gems/test-unit) + +You can install them by the followings: + +On Debian GNU/Linux or Ubuntu: + +```console +$ sudo apt install -y -V ruby-dev +$ sudo gem install bundler +$ (cd c_glib && bundle install) +``` + +On CentOS 7 or later: + +```console +$ sudo yum install -y git +$ git clone https://github.com/sstephenson/rbenv.git ~/.rbenv +$ git clone https://github.com/sstephenson/ruby-build.git ~/.rbenv/plugins/ruby-build +$ echo 'export PATH="$HOME/.rbenv/bin:$PATH"' >> ~/.bash_profile +$ echo 'eval "$(rbenv init -)"' >> ~/.bash_profile +$ exec ${SHELL} --login +$ sudo yum install -y gcc make patch openssl-devel readline-devel zlib-devel +$ latest_ruby_version=$(rbenv install --list 2>&1 | grep '^[0-9]' | tail -n1) +$ rbenv install ${latest_ruby_version} +$ rbenv global ${latest_ruby_version} +$ gem install bundler +$ (cd c_glib && bundle install) +``` + +On macOS with [Homebrew](https://brew.sh/): + +```console +$ (cd c_glib && bundle install) +``` + +Now, you can run unit tests by the followings: + +```console +$ cd c_glib.build +$ bundle exec ../c_glib/test/run-test.sh +``` + +## Common build problems + +### build failed - /usr/bin/ld: cannot find -larrow + +Arrow C++ must be installed to build Arrow GLib. Run `make install` on +Arrow C++ build directory. In addition, on linux, you may need to run +`sudo ldconfig`. + +### build failed - unable to load http://docbook.sourceforge.net/release/xsl/current/html/chunk.xsl + +You need to set the following environment variable on macOS: + +```console +$ export XML_CATALOG_FILES="$(brew --prefix)/etc/xml/catalog" +``` + +### build failed - Symbol not found, referenced from `libsource-highlight.4.dylib` + +You may get the following error on macOS: + + +```text +dyld: Symbol not found: __ZN5boost16re_detail_10650112perl_matcherIPKcNSt3__19allocatorINS_9sub_matchIS3_EEEENS_12regex_traitsIcNS_16cpp_regex_traitsIcEEEEE14construct_initERKNS_11basic_regexIcSC_EENS_15regex_constants12_match_flagsE + Referenced from: /usr/local/Cellar/source-highlight/3.1.8_7/lib/libsource-highlight.4.dylib + Expected in: flat namespace + in /usr/local/Cellar/source-highlight/3.1.8_7/lib/libsource-highlight.4.dylib +``` + +To fix this error, you need to upgrade `source-highlight`: + +```console +$ brew upgrade source-highlight +``` + +### test failed - Failed to load shared library '...' referenced by the typelib: dlopen(...): dependent dylib '@rpath/...' not found for '...'. relative file paths not allowed '@rpath/...' + +You may get the following error on macOS by running test: + +```text +(NULL)-WARNING **: Failed to load shared library '/usr/local/lib/libparquet-glib.400.dylib' referenced by the typelib: dlopen(/usr/local/lib/libparquet-glib.400.dylib, 0x0009): dependent dylib '@rpath/libparquet.400.dylib' not found for '/usr/local/lib/libparquet-glib.400.dylib'. relative file paths not allowed '@rpath/libparquet.400.dylib' + from /Library/Ruby/Gems/2.6.0/gems/gobject-introspection-3.4.3/lib/gobject-introspection/loader.rb:215:in `load_object_info' + from /Library/Ruby/Gems/2.6.0/gems/gobject-introspection-3.4.3/lib/gobject-introspection/loader.rb:68:in `load_info' + from /Library/Ruby/Gems/2.6.0/gems/gobject-introspection-3.4.3/lib/gobject-introspection/loader.rb:43:in `block in load' + from /Library/Ruby/Gems/2.6.0/gems/gobject-introspection-3.4.3/lib/gobject-introspection/repository.rb:34:in `block (2 levels) in each' + from /Library/Ruby/Gems/2.6.0/gems/gobject-introspection-3.4.3/lib/gobject-introspection/repository.rb:33:in `times' + from /Library/Ruby/Gems/2.6.0/gems/gobject-introspection-3.4.3/lib/gobject-introspection/repository.rb:33:in `block in each' + from /Library/Ruby/Gems/2.6.0/gems/gobject-introspection-3.4.3/lib/gobject-introspection/repository.rb:32:in `each' + from /Library/Ruby/Gems/2.6.0/gems/gobject-introspection-3.4.3/lib/gobject-introspection/repository.rb:32:in `each' + from /Library/Ruby/Gems/2.6.0/gems/gobject-introspection-3.4.3/lib/gobject-introspection/loader.rb:42:in `load' + from /Library/Ruby/Gems/2.6.0/gems/gobject-introspection-3.4.3/lib/gobject-introspection.rb:44:in `load' + from /Users/karlkatzen/Documents/code/arrow-dev/arrow/c_glib/test/run-test.rb:60:in `<main>' +Traceback (most recent call last): + 17: from /Users/karlkatzen/Documents/code/arrow-dev/arrow/c_glib/test/run-test.rb:80:in `<main>' + 16: from /Library/Ruby/Gems/2.6.0/gems/test-unit-3.4.0/lib/test/unit/autorunner.rb:66:in `run' + 15: from /Library/Ruby/Gems/2.6.0/gems/test-unit-3.4.0/lib/test/unit/autorunner.rb:434:in `run' + 14: from /Library/Ruby/Gems/2.6.0/gems/test-unit-3.4.0/lib/test/unit/autorunner.rb:106:in `block in <class:AutoRunner>' + 13: from /Library/Ruby/Gems/2.6.0/gems/test-unit-3.4.0/lib/test/unit/collector/load.rb:38:in `collect' + 12: from /Library/Ruby/Gems/2.6.0/gems/test-unit-3.4.0/lib/test/unit/collector/load.rb:136:in `add_load_path' + 11: from /Library/Ruby/Gems/2.6.0/gems/test-unit-3.4.0/lib/test/unit/collector/load.rb:43:in `block in collect' + 10: from /Library/Ruby/Gems/2.6.0/gems/test-unit-3.4.0/lib/test/unit/collector/load.rb:43:in `each' + 9: from /Library/Ruby/Gems/2.6.0/gems/test-unit-3.4.0/lib/test/unit/collector/load.rb:46:in `block (2 levels) in collect' + 8: from /Library/Ruby/Gems/2.6.0/gems/test-unit-3.4.0/lib/test/unit/collector/load.rb:85:in `collect_recursive' + 7: from /Library/Ruby/Gems/2.6.0/gems/test-unit-3.4.0/lib/test/unit/collector/load.rb:85:in `each' + 6: from /Library/Ruby/Gems/2.6.0/gems/test-unit-3.4.0/lib/test/unit/collector/load.rb:87:in `block in collect_recursive' + 5: from /Library/Ruby/Gems/2.6.0/gems/test-unit-3.4.0/lib/test/unit/collector/load.rb:112:in `collect_file' + 4: from /Library/Ruby/Gems/2.6.0/gems/test-unit-3.4.0/lib/test/unit/collector/load.rb:136:in `add_load_path' + 3: from /Library/Ruby/Gems/2.6.0/gems/test-unit-3.4.0/lib/test/unit/collector/load.rb:114:in `block in collect_file' + 2: from /Library/Ruby/Gems/2.6.0/gems/test-unit-3.4.0/lib/test/unit/collector/load.rb:114:in `require' + 1: from /Users/karlkatzen/Documents/code/arrow-dev/arrow/c_glib/test/test-extension-data-type.rb:18:in `<top (required)>' +/Users/karlkatzen/Documents/code/arrow-dev/arrow/c_glib/test/test-extension-data-type.rb:19:in `<class:TestExtensionDataType>': uninitialized constant Arrow::ExtensionArray (NameError) +``` + +You can't use `@rpath` in Arrow C++. To fix this error, you need to +build Arrow C++ with `-DARROW_INSTALL_NAME_RPATH=OFF`: + +```console +$ cmake -S cpp -B cpp.build -DARROW_INSTALL_NAME_RPATH=OFF ... +$ cmake --build cpp.build +$ sudo cmake --build cpp.build --target install +``` diff --git a/src/arrow/c_glib/arrow-cuda-glib/arrow-cuda-glib.h b/src/arrow/c_glib/arrow-cuda-glib/arrow-cuda-glib.h new file mode 100644 index 000000000..b3c7f2108 --- /dev/null +++ b/src/arrow/c_glib/arrow-cuda-glib/arrow-cuda-glib.h @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/arrow-glib.h> + +#include <arrow-cuda-glib/cuda.h> diff --git a/src/arrow/c_glib/arrow-cuda-glib/arrow-cuda-glib.hpp b/src/arrow/c_glib/arrow-cuda-glib/arrow-cuda-glib.hpp new file mode 100644 index 000000000..e79b43ae0 --- /dev/null +++ b/src/arrow/c_glib/arrow-cuda-glib/arrow-cuda-glib.hpp @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/arrow-glib.hpp> + +#include <arrow-cuda-glib/cuda.hpp> diff --git a/src/arrow/c_glib/arrow-cuda-glib/cuda.cpp b/src/arrow/c_glib/arrow-cuda-glib/cuda.cpp new file mode 100644 index 000000000..142cd6f38 --- /dev/null +++ b/src/arrow/c_glib/arrow-cuda-glib/cuda.cpp @@ -0,0 +1,944 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/buffer.hpp> +#include <arrow-glib/error.hpp> +#include <arrow-glib/input-stream.hpp> +#include <arrow-glib/ipc-options.hpp> +#include <arrow-glib/output-stream.hpp> +#include <arrow-glib/readable.hpp> +#include <arrow-glib/record-batch.hpp> +#include <arrow-glib/schema.hpp> + +#include <arrow-cuda-glib/cuda.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: cuda + * @section_id: cuda-classes + * @title: CUDA related classes + * @include: arrow-cuda-glib/arrow-cuda-glib.h + * + * The following classes provide CUDA support for Apache Arrow data. + * + * #GArrowCUDADeviceManager is the starting point. You need at + * least one #GArrowCUDAContext to process Apache Arrow data on + * NVIDIA GPU. + * + * #GArrowCUDAContext is a class to keep context for one GPU. You + * need to create #GArrowCUDAContext for each GPU that you want to + * use. You can create #GArrowCUDAContext by + * garrow_cuda_device_manager_get_context(). + * + * #GArrowCUDABuffer is a class for data on GPU. You can copy data + * on GPU to/from CPU by garrow_cuda_buffer_copy_to_host() and + * garrow_cuda_buffer_copy_from_host(). You can share data on GPU + * with other processes by garrow_cuda_buffer_export() and + * garrow_cuda_buffer_new_ipc(). + * + * #GArrowCUDAHostBuffer is a class for data on CPU that is + * directly accessible from GPU. + * + * #GArrowCUDAIPCMemoryHandle is a class to share data on GPU with + * other processes. You can export your data on GPU to other processes + * by garrow_cuda_buffer_export() and + * garrow_cuda_ipc_memory_handle_new(). You can import other + * process data on GPU by garrow_cuda_ipc_memory_handle_new() and + * garrow_cuda_buffer_new_ipc(). + * + * #GArrowCUDABufferInputStream is a class to read data in + * #GArrowCUDABuffer. + * + * #GArrowCUDABufferOutputStream is a class to write data into + * #GArrowCUDABuffer. + */ + +G_DEFINE_TYPE(GArrowCUDADeviceManager, + garrow_cuda_device_manager, + G_TYPE_OBJECT) + +static void +garrow_cuda_device_manager_init(GArrowCUDADeviceManager *object) +{ +} + +static void +garrow_cuda_device_manager_class_init(GArrowCUDADeviceManagerClass *klass) +{ +} + +/** + * garrow_cuda_device_manager_new: + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: A newly created #GArrowCUDADeviceManager on success, + * %NULL on error. + * + * Since: 0.8.0 + */ +GArrowCUDADeviceManager * +garrow_cuda_device_manager_new(GError **error) +{ + auto arrow_manager = arrow::cuda::CudaDeviceManager::Instance(); + if (garrow::check(error, arrow_manager, "[cuda][device-manager][new]")) { + auto manager = g_object_new(GARROW_CUDA_TYPE_DEVICE_MANAGER, + NULL); + return GARROW_CUDA_DEVICE_MANAGER(manager); + } else { + return NULL; + } +} + +/** + * garrow_cuda_device_manager_get_context: + * @manager: A #GArrowCUDADeviceManager. + * @gpu_number: A GPU device number for the target context. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): A newly created #GArrowCUDAContext on + * success, %NULL on error. Contexts for the same GPU device number + * share the same data internally. + * + * Since: 0.8.0 + */ +GArrowCUDAContext * +garrow_cuda_device_manager_get_context(GArrowCUDADeviceManager *manager, + gint gpu_number, + GError **error) +{ + auto arrow_manager = arrow::cuda::CudaDeviceManager::Instance(); + auto arrow_cuda_context = (*arrow_manager)->GetContext(gpu_number); + if (garrow::check(error, arrow_cuda_context, + "[cuda][device-manager][get-context]]")) { + return garrow_cuda_context_new_raw(&(*arrow_cuda_context)); + } else { + return NULL; + } +} + +/** + * garrow_cuda_device_manager_get_n_devices: + * @manager: A #GArrowCUDADeviceManager. + * + * Returns: The number of GPU devices. + * + * Since: 0.8.0 + */ +gsize +garrow_cuda_device_manager_get_n_devices(GArrowCUDADeviceManager *manager) +{ + auto arrow_manager = arrow::cuda::CudaDeviceManager::Instance(); + return (*arrow_manager)->num_devices(); +} + + +typedef struct GArrowCUDAContextPrivate_ { + std::shared_ptr<arrow::cuda::CudaContext> context; +} GArrowCUDAContextPrivate; + +enum { + PROP_CONTEXT = 1 +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowCUDAContext, + garrow_cuda_context, + G_TYPE_OBJECT) + +#define GARROW_CUDA_CONTEXT_GET_PRIVATE(object) \ + static_cast<GArrowCUDAContextPrivate *>( \ + garrow_cuda_context_get_instance_private( \ + GARROW_CUDA_CONTEXT(object))) + +static void +garrow_cuda_context_finalize(GObject *object) +{ + auto priv = GARROW_CUDA_CONTEXT_GET_PRIVATE(object); + + priv->context.~shared_ptr(); + + G_OBJECT_CLASS(garrow_cuda_context_parent_class)->finalize(object); +} + +static void +garrow_cuda_context_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_CUDA_CONTEXT_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_CONTEXT: + priv->context = + *static_cast<std::shared_ptr<arrow::cuda::CudaContext> *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_cuda_context_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + switch (prop_id) { + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_cuda_context_init(GArrowCUDAContext *object) +{ + auto priv = GARROW_CUDA_CONTEXT_GET_PRIVATE(object); + new(&priv->context) std::shared_ptr<arrow::cuda::CudaContext>; +} + +static void +garrow_cuda_context_class_init(GArrowCUDAContextClass *klass) +{ + GParamSpec *spec; + + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_cuda_context_finalize; + gobject_class->set_property = garrow_cuda_context_set_property; + gobject_class->get_property = garrow_cuda_context_get_property; + + /** + * GArrowCUDAContext:context: + * + * Since: 0.8.0 + */ + spec = g_param_spec_pointer("context", + "Context", + "The raw std::shared_ptr<arrow::cuda::CudaContext>", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_CONTEXT, spec); +} + +/** + * garrow_cuda_context_get_allocated_size: + * @context: A #GArrowCUDAContext. + * + * Returns: The allocated memory by this context in bytes. + * + * Since: 0.8.0 + */ +gint64 +garrow_cuda_context_get_allocated_size(GArrowCUDAContext *context) +{ + auto arrow_context = garrow_cuda_context_get_raw(context); + return arrow_context->bytes_allocated(); +} + + +G_DEFINE_TYPE(GArrowCUDABuffer, + garrow_cuda_buffer, + GARROW_TYPE_BUFFER) + +static void +garrow_cuda_buffer_init(GArrowCUDABuffer *object) +{ +} + +static void +garrow_cuda_buffer_class_init(GArrowCUDABufferClass *klass) +{ +} + +/** + * garrow_cuda_buffer_new: + * @context: A #GArrowCUDAContext. + * @size: The number of bytes to be allocated on GPU device for this context. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): A newly created #GArrowCUDABuffer on + * success, %NULL on error. + * + * Since: 0.8.0 + */ +GArrowCUDABuffer * +garrow_cuda_buffer_new(GArrowCUDAContext *context, + gint64 size, + GError **error) +{ + auto arrow_context = garrow_cuda_context_get_raw(context); + auto arrow_buffer = arrow_context->Allocate(size); + if (garrow::check(error, arrow_buffer, "[cuda][buffer][new]")) { + return garrow_cuda_buffer_new_raw(&(*arrow_buffer)); + } else { + return NULL; + } +} + +/** + * garrow_cuda_buffer_new_ipc: + * @context: A #GArrowCUDAContext. + * @handle: A #GArrowCUDAIPCMemoryHandle to be communicated. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): A newly created #GArrowCUDABuffer on + * success, %NULL on error. The buffer has data from the IPC target. + * + * Since: 0.8.0 + */ +GArrowCUDABuffer * +garrow_cuda_buffer_new_ipc(GArrowCUDAContext *context, + GArrowCUDAIPCMemoryHandle *handle, + GError **error) +{ + auto arrow_context = garrow_cuda_context_get_raw(context); + auto arrow_handle = garrow_cuda_ipc_memory_handle_get_raw(handle); + auto arrow_buffer = arrow_context->OpenIpcBuffer(*arrow_handle); + if (garrow::check(error, arrow_buffer, "[cuda][buffer][new-ipc]")) { + return garrow_cuda_buffer_new_raw(&(*arrow_buffer)); + } else { + return NULL; + } +} + +/** + * garrow_cuda_buffer_new_record_batch: + * @context: A #GArrowCUDAContext. + * @record_batch: A #GArrowRecordBatch to be serialized. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): A newly created #GArrowCUDABuffer on + * success, %NULL on error. The buffer has serialized record batch + * data. + * + * Since: 0.8.0 + */ +GArrowCUDABuffer * +garrow_cuda_buffer_new_record_batch(GArrowCUDAContext *context, + GArrowRecordBatch *record_batch, + GError **error) +{ + auto arrow_context = garrow_cuda_context_get_raw(context); + auto arrow_record_batch = garrow_record_batch_get_raw(record_batch); + auto arrow_buffer = arrow::cuda::SerializeRecordBatch(*arrow_record_batch, + arrow_context.get()); + if (garrow::check(error, arrow_buffer, "[cuda][buffer][new-record-batch]")) { + return garrow_cuda_buffer_new_raw(&(*arrow_buffer)); + } else { + return NULL; + } +} + +/** + * garrow_cuda_buffer_copy_to_host: + * @buffer: A #GArrowCUDABuffer. + * @position: The offset of memory on GPU device to be copied. + * @size: The size of memory on GPU device to be copied in bytes. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): A #GBytes that have copied memory on CPU + * host on success, %NULL on error. + * + * Since: 0.8.0 + */ +GBytes * +garrow_cuda_buffer_copy_to_host(GArrowCUDABuffer *buffer, + gint64 position, + gint64 size, + GError **error) +{ + auto arrow_buffer = garrow_cuda_buffer_get_raw(buffer); + auto data = static_cast<uint8_t *>(g_malloc(size)); + auto status = arrow_buffer->CopyToHost(position, size, data); + if (garrow_error_check(error, status, "[cuda][buffer][copy-to-host]")) { + return g_bytes_new_take(data, size); + } else { + g_free(data); + return NULL; + } +} + +/** + * garrow_cuda_buffer_copy_from_host: + * @buffer: A #GArrowCUDABuffer. + * @data: (array length=size): Data on CPU host to be copied. + * @size: The size of data on CPU host to be copied in bytes. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + */ +gboolean +garrow_cuda_buffer_copy_from_host(GArrowCUDABuffer *buffer, + const guint8 *data, + gint64 size, + GError **error) +{ + auto arrow_buffer = garrow_cuda_buffer_get_raw(buffer); + auto status = arrow_buffer->CopyFromHost(0, data, size); + return garrow_error_check(error, + status, + "[cuda][buffer][copy-from-host]"); +} + +/** + * garrow_cuda_buffer_export: + * @buffer: A #GArrowCUDABuffer. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): A newly created + * #GArrowCUDAIPCMemoryHandle to handle the exported buffer on + * success, %NULL on error + * + * Since: 0.8.0 + */ +GArrowCUDAIPCMemoryHandle * +garrow_cuda_buffer_export(GArrowCUDABuffer *buffer, GError **error) +{ + auto arrow_buffer = garrow_cuda_buffer_get_raw(buffer); + auto arrow_handle = arrow_buffer->ExportForIpc(); + if (garrow::check(error, arrow_handle, "[cuda][buffer][export-for-ipc]")) { + return garrow_cuda_ipc_memory_handle_new_raw(&(*arrow_handle)); + } else { + return NULL; + } +} + +/** + * garrow_cuda_buffer_get_context: + * @buffer: A #GArrowCUDABuffer. + * + * Returns: (transfer full): A newly created #GArrowCUDAContext for the + * buffer. Contexts for the same buffer share the same data internally. + * + * Since: 0.8.0 + */ +GArrowCUDAContext * +garrow_cuda_buffer_get_context(GArrowCUDABuffer *buffer) +{ + auto arrow_buffer = garrow_cuda_buffer_get_raw(buffer); + auto arrow_context = arrow_buffer->context(); + return garrow_cuda_context_new_raw(&arrow_context); +} + +/** + * garrow_cuda_buffer_read_record_batch: + * @buffer: A #GArrowCUDABuffer. + * @schema: A #GArrowSchema for record batch. + * @options: (nullable): A #GArrowReadOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): A newly created #GArrowRecordBatch on + * success, %NULL on error. The record batch data is located on GPU. + * + * Since: 0.8.0 + */ +GArrowRecordBatch * +garrow_cuda_buffer_read_record_batch(GArrowCUDABuffer *buffer, + GArrowSchema *schema, + GArrowReadOptions *options, + GError **error) +{ + auto arrow_buffer = garrow_cuda_buffer_get_raw(buffer); + auto arrow_schema = garrow_schema_get_raw(schema); + + if (options) { + auto arrow_options = garrow_read_options_get_raw(options); + auto arrow_dictionary_memo = + garrow_read_options_get_dictionary_memo_raw(options); + auto arrow_record_batch = + arrow::cuda::ReadRecordBatch(arrow_schema, + arrow_dictionary_memo, + arrow_buffer, + arrow_options->memory_pool); + if (garrow::check(error, arrow_record_batch, + "[cuda][buffer][read-record-batch]")) { + return garrow_record_batch_new_raw(&(*arrow_record_batch)); + } else { + return NULL; + } + } else { + auto arrow_pool = arrow::default_memory_pool(); + auto arrow_record_batch = + arrow::cuda::ReadRecordBatch(arrow_schema, + nullptr, + arrow_buffer, + arrow_pool); + if (garrow::check(error, arrow_record_batch, + "[cuda][buffer][read-record-batch]")) { + return garrow_record_batch_new_raw(&(*arrow_record_batch)); + } else { + return NULL; + } + } +} + + +G_DEFINE_TYPE(GArrowCUDAHostBuffer, + garrow_cuda_host_buffer, + GARROW_TYPE_MUTABLE_BUFFER) + +static void +garrow_cuda_host_buffer_init(GArrowCUDAHostBuffer *object) +{ +} + +static void +garrow_cuda_host_buffer_class_init(GArrowCUDAHostBufferClass *klass) +{ +} + +/** + * garrow_cuda_host_buffer_new: + * @gpu_number: A GPU device number for the target context. + * @size: The number of bytes to be allocated on CPU host. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: A newly created #GArrowCUDAHostBuffer on success, + * %NULL on error. The allocated memory is accessible from GPU + * device for the @context. + * + * Since: 0.8.0 + */ +GArrowCUDAHostBuffer * +garrow_cuda_host_buffer_new(gint gpu_number, gint64 size, GError **error) +{ + auto arrow_manager = arrow::cuda::CudaDeviceManager::Instance(); + auto arrow_buffer = (*arrow_manager)->AllocateHost(gpu_number, size); + if (garrow::check(error, arrow_buffer, "[cuda][host-buffer][new]")) { + return garrow_cuda_host_buffer_new_raw(&(*arrow_buffer)); + } else { + return NULL; + } +} + + +typedef struct GArrowCUDAIPCMemoryHandlePrivate_ { + std::shared_ptr<arrow::cuda::CudaIpcMemHandle> ipc_memory_handle; +} GArrowCUDAIPCMemoryHandlePrivate; + +enum { + PROP_IPC_MEMORY_HANDLE = 1 +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowCUDAIPCMemoryHandle, + garrow_cuda_ipc_memory_handle, + G_TYPE_OBJECT) + +#define GARROW_CUDA_IPC_MEMORY_HANDLE_GET_PRIVATE(object) \ + static_cast<GArrowCUDAIPCMemoryHandlePrivate *>( \ + garrow_cuda_ipc_memory_handle_get_instance_private( \ + GARROW_CUDA_IPC_MEMORY_HANDLE(object))) + +static void +garrow_cuda_ipc_memory_handle_finalize(GObject *object) +{ + auto priv = GARROW_CUDA_IPC_MEMORY_HANDLE_GET_PRIVATE(object); + + priv->ipc_memory_handle = nullptr; + + G_OBJECT_CLASS(garrow_cuda_ipc_memory_handle_parent_class)->finalize(object); +} + +static void +garrow_cuda_ipc_memory_handle_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_CUDA_IPC_MEMORY_HANDLE_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_IPC_MEMORY_HANDLE: + priv->ipc_memory_handle = + *static_cast<std::shared_ptr<arrow::cuda::CudaIpcMemHandle> *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_cuda_ipc_memory_handle_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + switch (prop_id) { + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_cuda_ipc_memory_handle_init(GArrowCUDAIPCMemoryHandle *object) +{ +} + +static void +garrow_cuda_ipc_memory_handle_class_init(GArrowCUDAIPCMemoryHandleClass *klass) +{ + GParamSpec *spec; + + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_cuda_ipc_memory_handle_finalize; + gobject_class->set_property = garrow_cuda_ipc_memory_handle_set_property; + gobject_class->get_property = garrow_cuda_ipc_memory_handle_get_property; + + /** + * GArrowCUDAIPCMemoryHandle:ipc-memory-handle: + * + * Since: 0.8.0 + */ + spec = g_param_spec_pointer("ipc-memory-handle", + "IPC Memory Handle", + "The raw std::shared_ptr<arrow::cuda::CudaIpcMemHandle>", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_IPC_MEMORY_HANDLE, spec); +} + +/** + * garrow_cuda_ipc_memory_handle_new: + * @data: (array length=size): A serialized #GArrowCUDAIPCMemoryHandle. + * @size: The size of data. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): A newly created #GArrowCUDAIPCMemoryHandle + * on success, %NULL on error. + * + * Since: 0.8.0 + */ +GArrowCUDAIPCMemoryHandle * +garrow_cuda_ipc_memory_handle_new(const guint8 *data, + gsize size, + GError **error) +{ + auto arrow_handle = arrow::cuda::CudaIpcMemHandle::FromBuffer(data); + if (garrow::check(error, arrow_handle, "[cuda][ipc-memory-handle][new]")) { + return garrow_cuda_ipc_memory_handle_new_raw(&(*arrow_handle)); + } else { + return NULL; + } +} + +/** + * garrow_cuda_ipc_memory_handle_serialize: + * @handle: A #GArrowCUDAIPCMemoryHandle. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): A newly created #GArrowBuffer on success, + * %NULL on error. The buffer has serialized @handle. The serialized + * @handle can be deserialized by garrow_cuda_ipc_memory_handle_new() + * in other process. + * + * Since: 0.8.0 + */ +GArrowBuffer * +garrow_cuda_ipc_memory_handle_serialize(GArrowCUDAIPCMemoryHandle *handle, + GError **error) +{ + auto arrow_handle = garrow_cuda_ipc_memory_handle_get_raw(handle); + auto arrow_buffer = arrow_handle->Serialize(arrow::default_memory_pool()); + if (garrow::check(error, arrow_buffer, + "[cuda][ipc-memory-handle][serialize]")) { + return garrow_buffer_new_raw(&(*arrow_buffer)); + } else { + return NULL; + } +} + +static GArrowBuffer * +garrow_cuda_buffer_input_stream_buffer_new_raw_readable_interface(std::shared_ptr<arrow::Buffer> *arrow_buffer) +{ + auto arrow_cuda_buffer = + reinterpret_cast<std::shared_ptr<arrow::cuda::CudaBuffer> *>(arrow_buffer); + auto cuda_buffer = garrow_cuda_buffer_new_raw(arrow_cuda_buffer); + return GARROW_BUFFER(cuda_buffer); +} + +static std::shared_ptr<arrow::io::Readable> +garrow_cuda_buffer_input_stream_get_raw_readable_interface(GArrowReadable *readable) +{ + auto input_stream = GARROW_INPUT_STREAM(readable); + auto arrow_input_stream = garrow_input_stream_get_raw(input_stream); + return arrow_input_stream; +} + +static void +garrow_cuda_buffer_input_stream_readable_interface_init(GArrowReadableInterface *iface) +{ + iface->buffer_new_raw = + garrow_cuda_buffer_input_stream_buffer_new_raw_readable_interface; + iface->get_raw = + garrow_cuda_buffer_input_stream_get_raw_readable_interface; +} + +G_DEFINE_TYPE_WITH_CODE( + GArrowCUDABufferInputStream, + garrow_cuda_buffer_input_stream, + GARROW_TYPE_BUFFER_INPUT_STREAM, + G_IMPLEMENT_INTERFACE( + GARROW_TYPE_READABLE, + garrow_cuda_buffer_input_stream_readable_interface_init)) + +static void +garrow_cuda_buffer_input_stream_init(GArrowCUDABufferInputStream *object) +{ +} + +static void +garrow_cuda_buffer_input_stream_class_init(GArrowCUDABufferInputStreamClass *klass) +{ +} + +/** + * garrow_cuda_buffer_input_stream_new: + * @buffer: A #GArrowCUDABuffer. + * + * Returns: (transfer full): A newly created + * #GArrowCUDABufferInputStream. + * + * Since: 0.8.0 + */ +GArrowCUDABufferInputStream * +garrow_cuda_buffer_input_stream_new(GArrowCUDABuffer *buffer) +{ + auto arrow_buffer = garrow_cuda_buffer_get_raw(buffer); + auto arrow_reader = + std::make_shared<arrow::cuda::CudaBufferReader>(arrow_buffer); + return garrow_cuda_buffer_input_stream_new_raw(&arrow_reader); +} + + +G_DEFINE_TYPE(GArrowCUDABufferOutputStream, + garrow_cuda_buffer_output_stream, + GARROW_TYPE_OUTPUT_STREAM) + +static void +garrow_cuda_buffer_output_stream_init(GArrowCUDABufferOutputStream *object) +{ +} + +static void +garrow_cuda_buffer_output_stream_class_init(GArrowCUDABufferOutputStreamClass *klass) +{ +} + +/** + * garrow_cuda_buffer_output_stream_new: + * @buffer: A #GArrowCUDABuffer. + * + * Returns: (transfer full): A newly created + * #GArrowCUDABufferOutputStream. + * + * Since: 0.8.0 + */ +GArrowCUDABufferOutputStream * +garrow_cuda_buffer_output_stream_new(GArrowCUDABuffer *buffer) +{ + auto arrow_buffer = garrow_cuda_buffer_get_raw(buffer); + auto arrow_writer = + std::make_shared<arrow::cuda::CudaBufferWriter>(arrow_buffer); + return garrow_cuda_buffer_output_stream_new_raw(&arrow_writer); +} + +/** + * garrow_cuda_buffer_output_stream_set_buffer_size: + * @stream: A #GArrowCUDABufferOutputStream. + * @size: A size of CPU buffer in bytes. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Sets CPU buffer size. to limit `cudaMemcpy()` calls. If CPU buffer + * size is `0`, buffering is disabled. + * + * The default is `0`. + * + * Since: 0.8.0 + */ +gboolean +garrow_cuda_buffer_output_stream_set_buffer_size(GArrowCUDABufferOutputStream *stream, + gint64 size, + GError **error) +{ + auto arrow_stream = garrow_cuda_buffer_output_stream_get_raw(stream); + auto status = arrow_stream->SetBufferSize(size); + return garrow_error_check(error, + status, + "[cuda][buffer-output-stream][set-buffer-size]"); +} + +/** + * garrow_cuda_buffer_output_stream_get_buffer_size: + * @stream: A #GArrowCUDABufferOutputStream. + * + * Returns: The CPU buffer size in bytes. + * + * See garrow_cuda_buffer_output_stream_set_buffer_size() for CPU + * buffer size details. + * + * Since: 0.8.0 + */ +gint64 +garrow_cuda_buffer_output_stream_get_buffer_size(GArrowCUDABufferOutputStream *stream) +{ + auto arrow_stream = garrow_cuda_buffer_output_stream_get_raw(stream); + return arrow_stream->buffer_size(); +} + +/** + * garrow_cuda_buffer_output_stream_get_buffered_size: + * @stream: A #GArrowCUDABufferOutputStream. + * + * Returns: The size of buffered data in bytes. + * + * Since: 0.8.0 + */ +gint64 +garrow_cuda_buffer_output_stream_get_buffered_size(GArrowCUDABufferOutputStream *stream) +{ + auto arrow_stream = garrow_cuda_buffer_output_stream_get_raw(stream); + return arrow_stream->num_bytes_buffered(); +} + + +G_END_DECLS + +GArrowCUDAContext * +garrow_cuda_context_new_raw(std::shared_ptr<arrow::cuda::CudaContext> *arrow_context) +{ + return GARROW_CUDA_CONTEXT(g_object_new(GARROW_CUDA_TYPE_CONTEXT, + "context", arrow_context, + NULL)); +} + +std::shared_ptr<arrow::cuda::CudaContext> +garrow_cuda_context_get_raw(GArrowCUDAContext *context) +{ + if (!context) + return nullptr; + + auto priv = GARROW_CUDA_CONTEXT_GET_PRIVATE(context); + return priv->context; +} + +GArrowCUDAIPCMemoryHandle * +garrow_cuda_ipc_memory_handle_new_raw(std::shared_ptr<arrow::cuda::CudaIpcMemHandle> *arrow_handle) +{ + auto handle = g_object_new(GARROW_CUDA_TYPE_IPC_MEMORY_HANDLE, + "ipc-memory-handle", arrow_handle, + NULL); + return GARROW_CUDA_IPC_MEMORY_HANDLE(handle); +} + +std::shared_ptr<arrow::cuda::CudaIpcMemHandle> +garrow_cuda_ipc_memory_handle_get_raw(GArrowCUDAIPCMemoryHandle *handle) +{ + if (!handle) + return nullptr; + + auto priv = GARROW_CUDA_IPC_MEMORY_HANDLE_GET_PRIVATE(handle); + return priv->ipc_memory_handle; +} + +GArrowCUDABuffer * +garrow_cuda_buffer_new_raw(std::shared_ptr<arrow::cuda::CudaBuffer> *arrow_buffer) +{ + return GARROW_CUDA_BUFFER(g_object_new(GARROW_CUDA_TYPE_BUFFER, + "buffer", arrow_buffer, + NULL)); +} + +std::shared_ptr<arrow::cuda::CudaBuffer> +garrow_cuda_buffer_get_raw(GArrowCUDABuffer *buffer) +{ + if (!buffer) + return nullptr; + + auto arrow_buffer = garrow_buffer_get_raw(GARROW_BUFFER(buffer)); + return std::static_pointer_cast<arrow::cuda::CudaBuffer>(arrow_buffer); +} + +GArrowCUDAHostBuffer * +garrow_cuda_host_buffer_new_raw(std::shared_ptr<arrow::cuda::CudaHostBuffer> *arrow_buffer) +{ + auto buffer = g_object_new(GARROW_CUDA_TYPE_HOST_BUFFER, + "buffer", arrow_buffer, + NULL); + return GARROW_CUDA_HOST_BUFFER(buffer); +} + +std::shared_ptr<arrow::cuda::CudaHostBuffer> +garrow_cuda_host_buffer_get_raw(GArrowCUDAHostBuffer *buffer) +{ + if (!buffer) + return nullptr; + + auto arrow_buffer = garrow_buffer_get_raw(GARROW_BUFFER(buffer)); + return std::static_pointer_cast<arrow::cuda::CudaHostBuffer>(arrow_buffer); +} + +GArrowCUDABufferInputStream * +garrow_cuda_buffer_input_stream_new_raw(std::shared_ptr<arrow::cuda::CudaBufferReader> *arrow_reader) +{ + auto input_stream = g_object_new(GARROW_CUDA_TYPE_BUFFER_INPUT_STREAM, + "input-stream", arrow_reader, + NULL); + return GARROW_CUDA_BUFFER_INPUT_STREAM(input_stream); +} + +std::shared_ptr<arrow::cuda::CudaBufferReader> +garrow_cuda_buffer_input_stream_get_raw(GArrowCUDABufferInputStream *input_stream) +{ + if (!input_stream) + return nullptr; + + auto arrow_reader = + garrow_input_stream_get_raw(GARROW_INPUT_STREAM(input_stream)); + return std::static_pointer_cast<arrow::cuda::CudaBufferReader>(arrow_reader); +} + +GArrowCUDABufferOutputStream * +garrow_cuda_buffer_output_stream_new_raw(std::shared_ptr<arrow::cuda::CudaBufferWriter> *arrow_writer) +{ + auto output_stream = g_object_new(GARROW_CUDA_TYPE_BUFFER_OUTPUT_STREAM, + "output-stream", arrow_writer, + NULL); + return GARROW_CUDA_BUFFER_OUTPUT_STREAM(output_stream); +} + +std::shared_ptr<arrow::cuda::CudaBufferWriter> +garrow_cuda_buffer_output_stream_get_raw(GArrowCUDABufferOutputStream *output_stream) +{ + if (!output_stream) + return nullptr; + + auto arrow_writer = + garrow_output_stream_get_raw(GARROW_OUTPUT_STREAM(output_stream)); + return std::static_pointer_cast<arrow::cuda::CudaBufferWriter>(arrow_writer); +} diff --git a/src/arrow/c_glib/arrow-cuda-glib/cuda.h b/src/arrow/c_glib/arrow-cuda-glib/cuda.h new file mode 100644 index 000000000..3c98daeaf --- /dev/null +++ b/src/arrow/c_glib/arrow-cuda-glib/cuda.h @@ -0,0 +1,183 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/arrow-glib.h> + +G_BEGIN_DECLS + +#define GARROW_CUDA_TYPE_DEVICE_MANAGER (garrow_cuda_device_manager_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowCUDADeviceManager, + garrow_cuda_device_manager, + GARROW_CUDA, + DEVICE_MANAGER, + GObject) +struct _GArrowCUDADeviceManagerClass +{ + GObjectClass parent_class; +}; + +#define GARROW_CUDA_TYPE_CONTEXT (garrow_cuda_context_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowCUDAContext, + garrow_cuda_context, + GARROW_CUDA, + CONTEXT, + GObject) +struct _GArrowCUDAContextClass +{ + GObjectClass parent_class; +}; + +#define GARROW_CUDA_TYPE_BUFFER (garrow_cuda_buffer_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowCUDABuffer, + garrow_cuda_buffer, + GARROW_CUDA, + BUFFER, + GArrowBuffer) +struct _GArrowCUDABufferClass +{ + GArrowBufferClass parent_class; +}; + +#define GARROW_CUDA_TYPE_HOST_BUFFER (garrow_cuda_host_buffer_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowCUDAHostBuffer, + garrow_cuda_host_buffer, + GARROW_CUDA, + HOST_BUFFER, + GArrowMutableBuffer) +struct _GArrowCUDAHostBufferClass +{ + GArrowMutableBufferClass parent_class; +}; + +#define GARROW_CUDA_TYPE_IPC_MEMORY_HANDLE \ + (garrow_cuda_ipc_memory_handle_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowCUDAIPCMemoryHandle, + garrow_cuda_ipc_memory_handle, + GARROW_CUDA, + IPC_MEMORY_HANDLE, + GObject) +struct _GArrowCUDAIPCMemoryHandleClass +{ + GObjectClass parent_class; +}; + +#define GARROW_CUDA_TYPE_BUFFER_INPUT_STREAM \ + (garrow_cuda_buffer_input_stream_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowCUDABufferInputStream, + garrow_cuda_buffer_input_stream, + GARROW_CUDA, + BUFFER_INPUT_STREAM, + GArrowBufferInputStream) +struct _GArrowCUDABufferInputStreamClass +{ + GArrowBufferInputStreamClass parent_class; +}; + +#define GARROW_CUDA_TYPE_BUFFER_OUTPUT_STREAM \ + (garrow_cuda_buffer_output_stream_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowCUDABufferOutputStream, + garrow_cuda_buffer_output_stream, + GARROW_CUDA, + BUFFER_OUTPUT_STREAM, + GArrowOutputStream) +struct _GArrowCUDABufferOutputStreamClass +{ + GArrowOutputStreamClass parent_class; +}; + +GArrowCUDADeviceManager * +garrow_cuda_device_manager_new(GError **error); + +GArrowCUDAContext * +garrow_cuda_device_manager_get_context(GArrowCUDADeviceManager *manager, + gint gpu_number, + GError **error); +gsize +garrow_cuda_device_manager_get_n_devices(GArrowCUDADeviceManager *manager); + +gint64 +garrow_cuda_context_get_allocated_size(GArrowCUDAContext *context); + + +GArrowCUDABuffer * +garrow_cuda_buffer_new(GArrowCUDAContext *context, + gint64 size, + GError **error); +GArrowCUDABuffer * +garrow_cuda_buffer_new_ipc(GArrowCUDAContext *context, + GArrowCUDAIPCMemoryHandle *handle, + GError **error); +GArrowCUDABuffer * +garrow_cuda_buffer_new_record_batch(GArrowCUDAContext *context, + GArrowRecordBatch *record_batch, + GError **error); +GBytes * +garrow_cuda_buffer_copy_to_host(GArrowCUDABuffer *buffer, + gint64 position, + gint64 size, + GError **error); +gboolean +garrow_cuda_buffer_copy_from_host(GArrowCUDABuffer *buffer, + const guint8 *data, + gint64 size, + GError **error); +GArrowCUDAIPCMemoryHandle * +garrow_cuda_buffer_export(GArrowCUDABuffer *buffer, + GError **error); +GArrowCUDAContext * +garrow_cuda_buffer_get_context(GArrowCUDABuffer *buffer); +GArrowRecordBatch * +garrow_cuda_buffer_read_record_batch(GArrowCUDABuffer *buffer, + GArrowSchema *schema, + GArrowReadOptions *options, + GError **error); + + +GArrowCUDAHostBuffer * +garrow_cuda_host_buffer_new(gint gpu_number, + gint64 size, + GError **error); + +GArrowCUDAIPCMemoryHandle * +garrow_cuda_ipc_memory_handle_new(const guint8 *data, + gsize size, + GError **error); + +GArrowBuffer * +garrow_cuda_ipc_memory_handle_serialize(GArrowCUDAIPCMemoryHandle *handle, + GError **error); + +GArrowCUDABufferInputStream * +garrow_cuda_buffer_input_stream_new(GArrowCUDABuffer *buffer); + +GArrowCUDABufferOutputStream * +garrow_cuda_buffer_output_stream_new(GArrowCUDABuffer *buffer); + +gboolean +garrow_cuda_buffer_output_stream_set_buffer_size(GArrowCUDABufferOutputStream *stream, + gint64 size, + GError **error); +gint64 +garrow_cuda_buffer_output_stream_get_buffer_size(GArrowCUDABufferOutputStream *stream); +gint64 +garrow_cuda_buffer_output_stream_get_buffered_size(GArrowCUDABufferOutputStream *stream); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-cuda-glib/cuda.hpp b/src/arrow/c_glib/arrow-cuda-glib/cuda.hpp new file mode 100644 index 000000000..0f8985a9d --- /dev/null +++ b/src/arrow/c_glib/arrow-cuda-glib/cuda.hpp @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/gpu/cuda_api.h> + +#include <arrow-cuda-glib/cuda.h> + +GArrowCUDAContext * +garrow_cuda_context_new_raw(std::shared_ptr<arrow::cuda::CudaContext> *arrow_context); +std::shared_ptr<arrow::cuda::CudaContext> +garrow_cuda_context_get_raw(GArrowCUDAContext *context); + +GArrowCUDAIPCMemoryHandle * +garrow_cuda_ipc_memory_handle_new_raw(std::shared_ptr<arrow::cuda::CudaIpcMemHandle> *arrow_handle); +std::shared_ptr<arrow::cuda::CudaIpcMemHandle> +garrow_cuda_ipc_memory_handle_get_raw(GArrowCUDAIPCMemoryHandle *handle); + +GArrowCUDABuffer * +garrow_cuda_buffer_new_raw(std::shared_ptr<arrow::cuda::CudaBuffer> *arrow_buffer); +std::shared_ptr<arrow::cuda::CudaBuffer> +garrow_cuda_buffer_get_raw(GArrowCUDABuffer *buffer); + +GArrowCUDAHostBuffer * +garrow_cuda_host_buffer_new_raw(std::shared_ptr<arrow::cuda::CudaHostBuffer> *arrow_buffer); +std::shared_ptr<arrow::cuda::CudaHostBuffer> +garrow_cuda_host_buffer_get_raw(GArrowCUDAHostBuffer *buffer); + +GArrowCUDABufferInputStream * +garrow_cuda_buffer_input_stream_new_raw(std::shared_ptr<arrow::cuda::CudaBufferReader> *arrow_reader); +std::shared_ptr<arrow::cuda::CudaBufferReader> +garrow_cuda_buffer_input_stream_get_raw(GArrowCUDABufferInputStream *input_stream); + +GArrowCUDABufferOutputStream * +garrow_cuda_buffer_output_stream_new_raw(std::shared_ptr<arrow::cuda::CudaBufferWriter> *arrow_writer); +std::shared_ptr<arrow::cuda::CudaBufferWriter> +garrow_cuda_buffer_output_stream_get_raw(GArrowCUDABufferOutputStream *output_stream); diff --git a/src/arrow/c_glib/arrow-cuda-glib/meson.build b/src/arrow/c_glib/arrow-cuda-glib/meson.build new file mode 100644 index 000000000..a655be084 --- /dev/null +++ b/src/arrow/c_glib/arrow-cuda-glib/meson.build @@ -0,0 +1,81 @@ +# -*- indent-tabs-mode: nil -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +sources = files( + 'cuda.cpp', +) + +c_headers = files( + 'arrow-cuda-glib.h', + 'cuda.h', +) + +cpp_headers = files( + 'arrow-cuda-glib.hpp', + 'cuda.hpp', +) + +headers = c_headers + cpp_headers +install_headers(headers, subdir: 'arrow-cuda-glib') + + +dependencies = [ + arrow_cuda, + arrow_glib, +] +libarrow_cuda_glib = library('arrow-cuda-glib', + sources: sources, + install: true, + dependencies: dependencies, + include_directories: base_include_directories, + soversion: so_version, + version: library_version) +arrow_cuda_glib = declare_dependency(link_with: libarrow_cuda_glib, + include_directories: base_include_directories, + dependencies: dependencies) + +pkgconfig.generate(libarrow_cuda_glib, + filebase: 'arrow-cuda-glib', + name: 'Apache Arrow CUDA GLib', + description: 'C API for Apache Arrow CUDA based on GLib', + version: version, + requires: ['arrow-glib', 'arrow-cuda']) + +if have_gi + gir_dependencies = [ + declare_dependency(sources: arrow_glib_gir), + ] + gir_extra_args = [ + '--warn-all', + '--include-uninstalled=./arrow-glib/Arrow-1.0.gir', + ] + arrow_cuda_glib_gir = gnome.generate_gir(libarrow_cuda_glib, + dependencies: gir_dependencies, + sources: sources + c_headers, + namespace: 'ArrowCUDA', + nsversion: api_version, + identifier_prefix: 'GArrowCUDA', + symbol_prefix: 'garrow_cuda', + export_packages: 'arrow-cuda-glib', + includes: [ + 'Arrow-1.0', + ], + install: true, + extra_args: gir_extra_args) +endif diff --git a/src/arrow/c_glib/arrow-dataset-glib/arrow-dataset-glib.h b/src/arrow/c_glib/arrow-dataset-glib/arrow-dataset-glib.h new file mode 100644 index 000000000..58f4e216c --- /dev/null +++ b/src/arrow/c_glib/arrow-dataset-glib/arrow-dataset-glib.h @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/arrow-glib.h> + +#include <arrow-dataset-glib/dataset-factory.h> +#include <arrow-dataset-glib/dataset.h> +#include <arrow-dataset-glib/enums.h> +#include <arrow-dataset-glib/file-format.h> +#include <arrow-dataset-glib/fragment.h> +#include <arrow-dataset-glib/partitioning.h> +#include <arrow-dataset-glib/scanner.h> diff --git a/src/arrow/c_glib/arrow-dataset-glib/arrow-dataset-glib.hpp b/src/arrow/c_glib/arrow-dataset-glib/arrow-dataset-glib.hpp new file mode 100644 index 000000000..8e9965068 --- /dev/null +++ b/src/arrow/c_glib/arrow-dataset-glib/arrow-dataset-glib.hpp @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/arrow-glib.hpp> + +#include <arrow-dataset-glib/dataset-factory.hpp> +#include <arrow-dataset-glib/dataset.hpp> +#include <arrow-dataset-glib/file-format.hpp> +#include <arrow-dataset-glib/fragment.hpp> +#include <arrow-dataset-glib/partitioning.hpp> +#include <arrow-dataset-glib/scanner.hpp> diff --git a/src/arrow/c_glib/arrow-dataset-glib/dataset-factory.cpp b/src/arrow/c_glib/arrow-dataset-glib/dataset-factory.cpp new file mode 100644 index 000000000..1e532760a --- /dev/null +++ b/src/arrow/c_glib/arrow-dataset-glib/dataset-factory.cpp @@ -0,0 +1,552 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/error.hpp> +#include <arrow-glib/file-system.hpp> + +#include <arrow-dataset-glib/dataset-factory.hpp> +#include <arrow-dataset-glib/dataset.hpp> +#include <arrow-dataset-glib/file-format.hpp> +#include <arrow-dataset-glib/partitioning.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: dataset-factory + * @section_id: dataset-factory + * @title: Dataset factory related classes + * @include: arrow-dataset-glib/arrow-dataset-glib.h + * + * #GADatasetDatasetFactory is a base class for dataset factories. + * + * #GADatasetFileSystemDatasetFactory is a class for + * #GADatasetFileSystemDataset factory. + * + * Since: 5.0.0 + */ + +typedef struct GADatasetDatasetFactoryPrivate_ { + std::shared_ptr<arrow::dataset::DatasetFactory> factory; +} GADatasetDatasetFactoryPrivate; + +enum { + PROP_DATASET_FACTORY = 1, +}; + +G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADatasetDatasetFactory, + gadataset_dataset_factory, + G_TYPE_OBJECT) + +#define GADATASET_DATASET_FACTORY_GET_PRIVATE(obj) \ + static_cast<GADatasetDatasetFactoryPrivate *>( \ + gadataset_dataset_factory_get_instance_private( \ + GADATASET_DATASET_FACTORY(obj))) + +static void +gadataset_dataset_factory_finalize(GObject *object) +{ + auto priv = GADATASET_DATASET_FACTORY_GET_PRIVATE(object); + priv->factory.~shared_ptr(); + G_OBJECT_CLASS(gadataset_dataset_factory_parent_class)->finalize(object); +} + +static void +gadataset_dataset_factory_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GADATASET_DATASET_FACTORY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_DATASET_FACTORY: + { + auto arrow_factory_pointer = + static_cast<std::shared_ptr<arrow::dataset::DatasetFactory> *>( + g_value_get_pointer(value)); + if (arrow_factory_pointer) { + priv->factory = *arrow_factory_pointer; + } + } + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gadataset_dataset_factory_init(GADatasetDatasetFactory *object) +{ + auto priv = GADATASET_DATASET_FACTORY_GET_PRIVATE(object); + new(&priv->factory) std::shared_ptr<arrow::dataset::DatasetFactory>; +} + +static void +gadataset_dataset_factory_class_init(GADatasetDatasetFactoryClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->finalize = gadataset_dataset_factory_finalize; + gobject_class->set_property = gadataset_dataset_factory_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("dataset-factory", + "Dataset factory", + "The raw " + "std::shared<arrow::dataset::DatasetFactory> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_DATASET_FACTORY, spec); +} + +/** + * gadataset_dataset_factory_finish: + * @factory: A #GADatasetDatasetFactory. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): + * A newly created #GADatasetDataset on success, %NULL on error. + * + * Since: 5.0.0 + */ +GADatasetDataset * +gadataset_dataset_factory_finish(GADatasetDatasetFactory *factory, + GError **error) +{ + auto arrow_factory = gadataset_dataset_factory_get_raw(factory); + auto arrow_dataset_result = arrow_factory->Finish(); + if (garrow::check(error, arrow_dataset_result, "[dataset-factory][finish]")) { + auto arrow_dataset = *arrow_dataset_result; + return gadataset_dataset_new_raw(&arrow_dataset); + } else { + return NULL; + } +} + + +typedef struct GADatasetFileSystemDatasetFactoryPrivate_ { + GADatasetFileFormat *format; + GArrowFileSystem *file_system; + GADatasetPartitioning *partitioning; + GList *files; + arrow::dataset::FileSystemFactoryOptions options; +} GADatasetFileSystemDatasetFactoryPrivate; + +enum { + PROP_FORMAT = 1, + PROP_FILE_SYSTEM, + PROP_PARTITIONING, + PROP_PARTITION_BASE_DIR, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileSystemDatasetFactory, + gadataset_file_system_dataset_factory, + GADATASET_TYPE_DATASET_FACTORY) + +#define GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(obj) \ + static_cast<GADatasetFileSystemDatasetFactoryPrivate *>( \ + gadataset_file_system_dataset_factory_get_instance_private( \ + GADATASET_FILE_SYSTEM_DATASET_FACTORY(obj))) + +static void +gadataset_file_system_dataset_factory_dispose(GObject *object) +{ + auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(object); + + if (priv->format) { + g_object_unref(priv->format); + priv->format = NULL; + } + + if (priv->file_system) { + g_object_unref(priv->file_system); + priv->file_system = NULL; + } + + if (priv->partitioning) { + g_object_unref(priv->partitioning); + priv->partitioning = NULL; + } + + if (priv->files) { + g_list_free_full(priv->files, g_object_unref); + priv->files = NULL; + } + + G_OBJECT_CLASS( + gadataset_file_system_dataset_factory_parent_class)->dispose(object); +} + +static void +gadataset_file_system_dataset_factory_finalize(GObject *object) +{ + auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(object); + priv->options.~FileSystemFactoryOptions(); + G_OBJECT_CLASS( + gadataset_file_system_dataset_factory_parent_class)->finalize(object); +} + +static void +gadataset_file_system_dataset_factory_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_FORMAT: + priv->format = GADATASET_FILE_FORMAT(g_value_dup_object(value)); + break; + case PROP_PARTITIONING: + { + auto partitioning = g_value_get_object(value); + if (partitioning == priv->partitioning) { + break; + } + auto old_partitioning = priv->partitioning; + if (partitioning) { + g_object_ref(partitioning); + priv->partitioning = GADATASET_PARTITIONING(partitioning); + priv->options.partitioning = + gadataset_partitioning_get_raw(priv->partitioning); + } else { + priv->options.partitioning = arrow::dataset::Partitioning::Default(); + } + if (old_partitioning) { + g_object_unref(old_partitioning); + } + } + break; + case PROP_PARTITION_BASE_DIR: + priv->options.partition_base_dir = g_value_get_string(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gadataset_file_system_dataset_factory_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_FORMAT: + g_value_set_object(value, priv->format); + break; + case PROP_FILE_SYSTEM: + g_value_set_object(value, priv->file_system); + break; + case PROP_PARTITIONING: + g_value_set_object(value, priv->partitioning); + break; + case PROP_PARTITION_BASE_DIR: + g_value_set_string(value, priv->options.partition_base_dir.c_str()); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gadataset_file_system_dataset_factory_init( + GADatasetFileSystemDatasetFactory *object) +{ + auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(object); + new(&priv->options) arrow::dataset::FileSystemFactoryOptions; +} + +static void +gadataset_file_system_dataset_factory_class_init( + GADatasetFileSystemDatasetFactoryClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->dispose = gadataset_file_system_dataset_factory_dispose; + gobject_class->finalize = gadataset_file_system_dataset_factory_finalize; + gobject_class->set_property = gadataset_file_system_dataset_factory_set_property; + gobject_class->get_property = gadataset_file_system_dataset_factory_get_property; + + GParamSpec *spec; + /** + * GADatasetFileSystemDatasetFactory:format: + * + * Format passed to #GADatasetFileSystemDataset. + * + * Since: 5.0.0 + */ + spec = g_param_spec_object("format", + "Format", + "Format passed to GADatasetFileSystemDataset", + GADATASET_TYPE_FILE_FORMAT, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_FORMAT, spec); + + /** + * GADatasetFileSystemDatasetFactory:file-system: + * + * File system passed to #GADatasetFileSystemDataset. + * + * Since: 5.0.0 + */ + spec = g_param_spec_object("file-system", + "File system", + "File system passed to GADatasetFileSystemDataset", + GARROW_TYPE_FILE_SYSTEM, + static_cast<GParamFlags>(G_PARAM_READABLE)); + g_object_class_install_property(gobject_class, PROP_FILE_SYSTEM, spec); + + /** + * GADatasetFileSystemDatasetFactory:partitioning: + * + * Partitioning used by #GADatasetFileSystemDataset. + * + * Since: 6.0.0 + */ + spec = g_param_spec_object("partitioning", + "Partitioning", + "Partitioning used by GADatasetFileSystemDataset", + GADATASET_TYPE_PARTITIONING, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_PARTITIONING, spec); + + /** + * GADatasetFileSystemDatasetFactory:partition-base-dir: + * + * Partition base directory used by #GADatasetFileSystemDataset. + * + * Since: 6.0.0 + */ + spec = g_param_spec_string("partition-base-dir", + "Partition base directory", + "Partition base directory " + "used by GADatasetFileSystemDataset", + NULL, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_PARTITION_BASE_DIR, spec); +} + +/** + * gadataset_file_system_factory_new: + * @format: A #GADatasetFileFormat. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: A newly created #GADatasetDatasetFileSystemFactory on success, + * %NULL on error. + * + * Since: 5.0.0 + */ +GADatasetFileSystemDatasetFactory * +gadataset_file_system_dataset_factory_new(GADatasetFileFormat *format) +{ + return GADATASET_FILE_SYSTEM_DATASET_FACTORY( + g_object_new(GADATASET_TYPE_FILE_SYSTEM_DATASET_FACTORY, + "format", format, + NULL)); +} + +/** + * gadataset_file_system_dataset_factory_set_file_system: + * @factory: A #GADatasetFileSystemDatasetFactory. + * @file_system: A #GArrowFileSystem. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE otherwise. + * + * Since: 5.0.0 + */ +gboolean +gadataset_file_system_dataset_factory_set_file_system( + GADatasetFileSystemDatasetFactory *factory, + GArrowFileSystem *file_system, + GError **error) +{ + const gchar *context = "[file-system-dataset-factory][set-file-system]"; + auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(factory); + if (priv->file_system) { + garrow::check(error, + arrow::Status::Invalid("file system is already set"), + context); + return FALSE; + } + priv->file_system = file_system; + g_object_ref(priv->file_system); + return TRUE; +} + +/** + * gadataset_file_system_dataset_factory_set_file_system_uri: + * @factory: A #GADatasetFileSystemDatasetFactory. + * @uri: An URI for file system. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE otherwise. + * + * Since: 5.0.0 + */ +gboolean +gadataset_file_system_dataset_factory_set_file_system_uri( + GADatasetFileSystemDatasetFactory *factory, + const gchar *uri, + GError **error) +{ + const gchar *context = "[file-system-dataset-factory][set-file-system-uri]"; + auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(factory); + if (priv->file_system) { + garrow::check(error, + arrow::Status::Invalid("file system is already set"), + context); + return FALSE; + } + std::string internal_path; + auto arrow_file_system_result = + arrow::fs::FileSystemFromUri(uri, &internal_path); + if (!garrow::check(error, arrow_file_system_result, context)) { + return FALSE; + } + auto arrow_file_system = *arrow_file_system_result; + auto arrow_file_info_result = arrow_file_system->GetFileInfo(internal_path); + if (!garrow::check(error, arrow_file_info_result, context)) { + return FALSE; + } + priv->file_system = garrow_file_system_new_raw(&arrow_file_system); + auto file_info = garrow_file_info_new_raw(*arrow_file_info_result); + priv->files = g_list_prepend(priv->files, file_info); + return TRUE; +} + +/** + * gadataset_file_system_dataset_factory_add_path: + * @factory: A #GADatasetFileSystemDatasetFactory. + * @path: A path to be added. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE otherwise. + * + * Since: 5.0.0 + */ +gboolean +gadataset_file_system_dataset_factory_add_path( + GADatasetFileSystemDatasetFactory *factory, + const gchar *path, + GError **error) +{ + const gchar *context = "[file-system-dataset-factory][add-path]"; + auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(factory); + if (!priv->file_system) { + garrow::check(error, + arrow::Status::Invalid("file system isn't set"), + context); + return FALSE; + } + auto arrow_file_system = garrow_file_system_get_raw(priv->file_system); + auto arrow_file_info_result = arrow_file_system->GetFileInfo(path); + if (!garrow::check(error, arrow_file_info_result, context)) { + return FALSE; + } + auto file_info = garrow_file_info_new_raw(*arrow_file_info_result); + priv->files = g_list_prepend(priv->files, file_info); + return TRUE; +} + +/** + * gadataset_file_system_dataset_factory_finish: + * @factory: A #GADatasetFileSystemDatasetFactory. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): + * A newly created #GADatasetFileSystemDataset on success, %NULL on error. + * + * Since: 5.0.0 + */ +GADatasetFileSystemDataset * +gadataset_file_system_dataset_factory_finish( + GADatasetFileSystemDatasetFactory *factory, + GError **error) +{ + const gchar *context = "[file-system-dataset-factory][finish]"; + auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(factory); + if (!priv->file_system) { + garrow::check(error, + arrow::Status::Invalid("file system isn't set"), + context); + return NULL; + } + auto arrow_file_system = garrow_file_system_get_raw(priv->file_system); + auto arrow_format = gadataset_file_format_get_raw(priv->format); + arrow::Result<std::shared_ptr<arrow::dataset::DatasetFactory>> + arrow_factory_result; + if (priv->files && + !priv->files->next && + garrow_file_info_is_dir(GARROW_FILE_INFO(priv->files->data))) { + auto file = GARROW_FILE_INFO(priv->files->data); + arrow::fs::FileSelector arrow_selector; + arrow_selector.base_dir = garrow_file_info_get_raw(file)->path(); + arrow_selector.recursive = true; + arrow_factory_result = + arrow::dataset::FileSystemDatasetFactory::Make(arrow_file_system, + arrow_selector, + arrow_format, + priv->options); + } else { + std::vector<arrow::fs::FileInfo> arrow_files; + priv->files = g_list_reverse(priv->files); + for (auto node = priv->files; node; node = node->next) { + auto file = GARROW_FILE_INFO(node->data); + arrow_files.push_back(*garrow_file_info_get_raw(file)); + } + priv->files = g_list_reverse(priv->files); + arrow_factory_result = + arrow::dataset::FileSystemDatasetFactory::Make(arrow_file_system, + arrow_files, + arrow_format, + priv->options); + } + if (!garrow::check(error, arrow_factory_result, context)) { + return NULL; + } + auto arrow_dataset_result = (*arrow_factory_result)->Finish(); + if (!garrow::check(error, arrow_dataset_result, context)) { + return NULL; + } + auto arrow_dataset = *arrow_dataset_result; + return GADATASET_FILE_SYSTEM_DATASET( + gadataset_dataset_new_raw(&arrow_dataset, + "dataset", &arrow_dataset, + "file-system", priv->file_system, + "format", priv->format, + "partitioning", priv->partitioning, + NULL)); +} + + +G_END_DECLS + +std::shared_ptr<arrow::dataset::DatasetFactory> +gadataset_dataset_factory_get_raw(GADatasetDatasetFactory *factory) +{ + auto priv = GADATASET_DATASET_FACTORY_GET_PRIVATE(factory); + return priv->factory; +} diff --git a/src/arrow/c_glib/arrow-dataset-glib/dataset-factory.h b/src/arrow/c_glib/arrow-dataset-glib/dataset-factory.h new file mode 100644 index 000000000..e2ee3ed98 --- /dev/null +++ b/src/arrow/c_glib/arrow-dataset-glib/dataset-factory.h @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-dataset-glib/dataset.h> + +G_BEGIN_DECLS + +#define GADATASET_TYPE_DATASET_FACTORY (gadataset_dataset_factory_get_type()) +G_DECLARE_DERIVABLE_TYPE(GADatasetDatasetFactory, + gadataset_dataset_factory, + GADATASET, + DATASET_FACTORY, + GObject) +struct _GADatasetDatasetFactoryClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GADatasetDataset * +gadataset_dataset_factory_finish(GADatasetDatasetFactory *factory, + GError **error); + + +#define GADATASET_TYPE_FILE_SYSTEM_DATASET_FACTORY \ + (gadataset_file_system_dataset_factory_get_type()) +G_DECLARE_DERIVABLE_TYPE(GADatasetFileSystemDatasetFactory, + gadataset_file_system_dataset_factory, + GADATASET, + FILE_SYSTEM_DATASET_FACTORY, + GADatasetDatasetFactory) +struct _GADatasetFileSystemDatasetFactoryClass +{ + GADatasetDatasetFactoryClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GADatasetFileSystemDatasetFactory * +gadataset_file_system_dataset_factory_new(GADatasetFileFormat *file_format); +GARROW_AVAILABLE_IN_5_0 +gboolean +gadataset_file_system_dataset_factory_set_file_system( + GADatasetFileSystemDatasetFactory *factory, + GArrowFileSystem *file_system, + GError **error); +gboolean +gadataset_file_system_dataset_factory_set_file_system_uri( + GADatasetFileSystemDatasetFactory *factory, + const gchar *uri, + GError **error); + +GARROW_AVAILABLE_IN_5_0 +gboolean +gadataset_file_system_dataset_factory_add_path( + GADatasetFileSystemDatasetFactory *factory, + const gchar *path, + GError **error); +/* +GARROW_AVAILABLE_IN_5_0 +gboolean +gadataset_file_system_dataset_factory_add_file( + GADatasetFileSystemDatasetFactory *factory, + GArrowFileInfo *file, + GError **error); +GARROW_AVAILABLE_IN_5_0 +gboolean +gadataset_file_system_dataset_factory_add_selector( + GADatasetFileSystemDatasetFactory *factory, + GArrorFileSelector *selector, + GError **error); +*/ + +GARROW_AVAILABLE_IN_5_0 +GADatasetFileSystemDataset * +gadataset_file_system_dataset_factory_finish( + GADatasetFileSystemDatasetFactory *factory, + GError **error); + + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-dataset-glib/dataset-factory.hpp b/src/arrow/c_glib/arrow-dataset-glib/dataset-factory.hpp new file mode 100644 index 000000000..114db35bc --- /dev/null +++ b/src/arrow/c_glib/arrow-dataset-glib/dataset-factory.hpp @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/dataset/api.h> + +#include <arrow-dataset-glib/dataset-factory.h> + +std::shared_ptr<arrow::dataset::DatasetFactory> +gadataset_dataset_factory_get_raw(GADatasetDatasetFactory *factory); diff --git a/src/arrow/c_glib/arrow-dataset-glib/dataset.cpp b/src/arrow/c_glib/arrow-dataset-glib/dataset.cpp new file mode 100644 index 000000000..8613bedad --- /dev/null +++ b/src/arrow/c_glib/arrow-dataset-glib/dataset.cpp @@ -0,0 +1,736 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/error.hpp> +#include <arrow-glib/file-system.hpp> +#include <arrow-glib/table.hpp> + +#include <arrow-dataset-glib/dataset-factory.hpp> +#include <arrow-dataset-glib/dataset.hpp> +#include <arrow-dataset-glib/file-format.hpp> +#include <arrow-dataset-glib/partitioning.hpp> +#include <arrow-dataset-glib/scanner.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: dataset + * @section_id: dataset + * @title: Dataset related classes + * @include: arrow-dataset-glib/arrow-dataset-glib.h + * + * #GADatasetDataset is a base class for datasets. + * + * #GADatasetFileSystemDataset is a class for file system dataset. + * + * #GADatasetFileSystemDatasetWriteOptions is a class for options to + * write a dataset to file system dataset. + * + * Since: 5.0.0 + */ + +typedef struct GADatasetDatasetPrivate_ { + std::shared_ptr<arrow::dataset::Dataset> dataset; +} GADatasetDatasetPrivate; + +enum { + PROP_DATASET = 1, +}; + +G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADatasetDataset, + gadataset_dataset, + G_TYPE_OBJECT) + +#define GADATASET_DATASET_GET_PRIVATE(obj) \ + static_cast<GADatasetDatasetPrivate *>( \ + gadataset_dataset_get_instance_private( \ + GADATASET_DATASET(obj))) + +static void +gadataset_dataset_finalize(GObject *object) +{ + auto priv = GADATASET_DATASET_GET_PRIVATE(object); + priv->dataset.~shared_ptr(); + G_OBJECT_CLASS(gadataset_dataset_parent_class)->finalize(object); +} + +static void +gadataset_dataset_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GADATASET_DATASET_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_DATASET: + priv->dataset = + *static_cast<std::shared_ptr<arrow::dataset::Dataset> *>( + g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gadataset_dataset_init(GADatasetDataset *object) +{ + auto priv = GADATASET_DATASET_GET_PRIVATE(object); + new(&priv->dataset) std::shared_ptr<arrow::dataset::Dataset>; +} + +static void +gadataset_dataset_class_init(GADatasetDatasetClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->finalize = gadataset_dataset_finalize; + gobject_class->set_property = gadataset_dataset_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("dataset", + "Dataset", + "The raw " + "std::shared<arrow::dataset::Dataset> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_DATASET, spec); +} + +/** + * gadataset_dataset_begin_scan: + * @dataset: A #GADatasetDataset. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): + * A newly created #GADatasetScannerBuilder on success, %NULL on error. + * + * Since: 5.0.0 + */ +GADatasetScannerBuilder * +gadataset_dataset_begin_scan(GADatasetDataset *dataset, + GError **error) +{ + return gadataset_scanner_builder_new(dataset, error); +} + +/** + * gadataset_dataset_to_table: + * @dataset: A #GADatasetDataset. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): + * A loaded #GArrowTable on success, %NULL on error. + * + * Since: 5.0.0 + */ +GArrowTable * +gadataset_dataset_to_table(GADatasetDataset *dataset, + GError **error) +{ + auto arrow_dataset = gadataset_dataset_get_raw(dataset); + auto arrow_scanner_builder_result = arrow_dataset->NewScan(); + if (!garrow::check(error, + arrow_scanner_builder_result, + "[dataset][to-table]")) { + return NULL; + } + auto arrow_scanner_builder = *arrow_scanner_builder_result; + auto arrow_scanner_result = arrow_scanner_builder->Finish(); + if (!garrow::check(error, + arrow_scanner_result, + "[dataset][to-table]")) { + return NULL; + } + auto arrow_scanner = *arrow_scanner_result; + auto arrow_table_result = arrow_scanner->ToTable(); + if (!garrow::check(error, + arrow_scanner_result, + "[dataset][to-table]")) { + return NULL; + } + return garrow_table_new_raw(&(*arrow_table_result)); +} + +/** + * gadataset_dataset_get_type_name: + * @dataset: A #GADatasetDataset. + * + * Returns: The type name of @dataset. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 5.0.0 + */ +gchar * +gadataset_dataset_get_type_name(GADatasetDataset *dataset) +{ + const auto arrow_dataset = gadataset_dataset_get_raw(dataset); + const auto &type_name = arrow_dataset->type_name(); + return g_strndup(type_name.data(), type_name.size()); +} + + +typedef struct GADatasetFileSystemDatasetWriteOptionsPrivate_ { + arrow::dataset::FileSystemDatasetWriteOptions options; + GADatasetFileWriteOptions *file_write_options; + GArrowFileSystem *file_system; + GADatasetPartitioning *partitioning; +} GADatasetFileSystemDatasetWriteOptionsPrivate; + +enum { + PROP_FILE_WRITE_OPTIONS = 1, + PROP_FILE_SYSTEM, + PROP_BASE_DIR, + PROP_PARTITIONING, + PROP_MAX_PARTITIONS, + PROP_BASE_NAME_TEMPLATE, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileSystemDatasetWriteOptions, + gadataset_file_system_dataset_write_options, + G_TYPE_OBJECT) + +#define GADATASET_FILE_SYSTEM_DATASET_WRITE_OPTIONS_GET_PRIVATE(obj) \ + static_cast<GADatasetFileSystemDatasetWriteOptionsPrivate *>( \ + gadataset_file_system_dataset_write_options_get_instance_private( \ + GADATASET_FILE_SYSTEM_DATASET_WRITE_OPTIONS(obj))) + +static void +gadataset_file_system_dataset_write_options_finalize(GObject *object) +{ + auto priv = GADATASET_FILE_SYSTEM_DATASET_WRITE_OPTIONS_GET_PRIVATE(object); + priv->options.~FileSystemDatasetWriteOptions(); + G_OBJECT_CLASS(gadataset_file_system_dataset_write_options_parent_class)-> + finalize(object); +} + +static void +gadataset_file_system_dataset_write_options_dispose(GObject *object) +{ + auto priv = GADATASET_FILE_SYSTEM_DATASET_WRITE_OPTIONS_GET_PRIVATE(object); + + if (priv->file_write_options) { + g_object_unref(priv->file_write_options); + priv->file_write_options = NULL; + } + + if (priv->file_system) { + g_object_unref(priv->file_system); + priv->file_system = NULL; + } + + if (priv->partitioning) { + g_object_unref(priv->partitioning); + priv->partitioning = NULL; + } + + G_OBJECT_CLASS(gadataset_file_system_dataset_write_options_parent_class)-> + dispose(object); +} + +static void +gadataset_file_system_dataset_write_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GADATASET_FILE_SYSTEM_DATASET_WRITE_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_FILE_WRITE_OPTIONS: + { + auto file_write_options = g_value_get_object(value); + if (file_write_options == priv->file_write_options) { + break; + } + auto old_file_write_options = priv->file_write_options; + if (file_write_options) { + g_object_ref(file_write_options); + priv->file_write_options = + GADATASET_FILE_WRITE_OPTIONS(file_write_options); + priv->options.file_write_options = + gadataset_file_write_options_get_raw(priv->file_write_options); + } else { + priv->options.file_write_options = nullptr; + } + if (old_file_write_options) { + g_object_unref(old_file_write_options); + } + } + break; + case PROP_FILE_SYSTEM: + { + auto file_system = g_value_get_object(value); + if (file_system == priv->file_system) { + break; + } + auto old_file_system = priv->file_system; + if (file_system) { + g_object_ref(file_system); + priv->file_system = GARROW_FILE_SYSTEM(file_system); + priv->options.filesystem = garrow_file_system_get_raw(priv->file_system); + } else { + priv->options.filesystem = nullptr; + } + if (old_file_system) { + g_object_unref(old_file_system); + } + } + break; + case PROP_BASE_DIR: + priv->options.base_dir = g_value_get_string(value); + break; + case PROP_PARTITIONING: + { + auto partitioning = g_value_get_object(value); + if (partitioning == priv->partitioning) { + break; + } + auto old_partitioning = priv->partitioning; + if (partitioning) { + g_object_ref(partitioning); + priv->partitioning = GADATASET_PARTITIONING(partitioning); + priv->options.partitioning = + gadataset_partitioning_get_raw(priv->partitioning); + } else { + priv->options.partitioning = arrow::dataset::Partitioning::Default(); + } + if (old_partitioning) { + g_object_unref(old_partitioning); + } + } + break; + case PROP_MAX_PARTITIONS: + priv->options.max_partitions = g_value_get_uint(value); + break; + case PROP_BASE_NAME_TEMPLATE: + priv->options.basename_template = g_value_get_string(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gadataset_file_system_dataset_write_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GADATASET_FILE_SYSTEM_DATASET_WRITE_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_FILE_WRITE_OPTIONS: + g_value_set_object(value, priv->file_write_options); + break; + case PROP_FILE_SYSTEM: + g_value_set_object(value, priv->file_system); + break; + case PROP_BASE_DIR: + g_value_set_string(value, priv->options.base_dir.c_str()); + break; + case PROP_PARTITIONING: + g_value_set_object(value, priv->partitioning); + break; + case PROP_MAX_PARTITIONS: + g_value_set_uint(value, priv->options.max_partitions); + break; + case PROP_BASE_NAME_TEMPLATE: + g_value_set_string(value, priv->options.basename_template.c_str()); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gadataset_file_system_dataset_write_options_init( + GADatasetFileSystemDatasetWriteOptions *object) +{ + auto priv = GADATASET_FILE_SYSTEM_DATASET_WRITE_OPTIONS_GET_PRIVATE(object); + new(&(priv->options)) arrow::dataset::FileSystemDatasetWriteOptions; + priv->options.partitioning = arrow::dataset::Partitioning::Default(); +} + +static void +gadataset_file_system_dataset_write_options_class_init( + GADatasetFileSystemDatasetWriteOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->finalize = + gadataset_file_system_dataset_write_options_finalize; + gobject_class->dispose = + gadataset_file_system_dataset_write_options_dispose; + gobject_class->set_property = + gadataset_file_system_dataset_write_options_set_property; + gobject_class->get_property = + gadataset_file_system_dataset_write_options_get_property; + + arrow::dataset::FileSystemDatasetWriteOptions default_options; + GParamSpec *spec; + /** + * GADatasetFileSystemDatasetWriteOptions:file_write_options: + * + * Options for individual fragment writing. + * + * Since: 6.0.0 + */ + spec = g_param_spec_object("file-write-options", + "File write options", + "Options for individual fragment writing", + GADATASET_TYPE_FILE_WRITE_OPTIONS, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_FILE_WRITE_OPTIONS, spec); + + /** + * GADatasetFileSystemDatasetWriteOptions:file_system: + * + * #GArrowFileSystem into which a dataset will be written. + * + * Since: 6.0.0 + */ + spec = g_param_spec_object("file-system", + "File system", + "GArrowFileSystem into which " + "a dataset will be written", + GARROW_TYPE_FILE_SYSTEM, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_FILE_SYSTEM, spec); + + /** + * GADatasetFileSystemDatasetWriteOptions:base_dir: + * + * Root directory into which the dataset will be written. + * + * Since: 6.0.0 + */ + spec = g_param_spec_string("base-dir", + "Base directory", + "Root directory into which " + "the dataset will be written", + NULL, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_BASE_DIR, spec); + + /** + * GADatasetFileSystemDatasetWriteOptions:partitioning: + * + * #GADatasetPartitioning used to generate fragment paths. + * + * Since: 6.0.0 + */ + spec = g_param_spec_object("partitioning", + "Partitioning", + "GADatasetPartitioning used to " + "generate fragment paths", + GADATASET_TYPE_PARTITIONING, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_PARTITIONING, spec); + + /** + * GADatasetFileSystemDatasetWriteOptions:max-partitions: + * + * Maximum number of partitions any batch may be written into. + * + * Since: 6.0.0 + */ + spec = g_param_spec_uint("max-partitions", + "Max partitions", + "Maximum number of partitions " + "any batch may be written into", + 0, + G_MAXINT, + default_options.max_partitions, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_MAX_PARTITIONS, spec); + + /** + * GADatasetFileSystemDatasetWriteOptions:base-name-template: + * + * Template string used to generate fragment base names. {i} will be + * replaced by an auto incremented integer. + * + * Since: 6.0.0 + */ + spec = g_param_spec_string("base-name-template", + "Base name template", + "Template string used to generate fragment " + "base names. {i} will be replaced by " + "an auto incremented integer", + NULL, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_BASE_NAME_TEMPLATE, spec); +} + +/** + * gadataset_file_system_dataset_write_options_new: + * + * Returns: The newly created #GADatasetFileSystemDatasetWriteOptions. + * + * Since: 6.0.0 + */ +GADatasetFileSystemDatasetWriteOptions * +gadataset_file_system_dataset_write_options_new(void) +{ + return GADATASET_FILE_SYSTEM_DATASET_WRITE_OPTIONS( + g_object_new(GADATASET_TYPE_FILE_SYSTEM_DATASET_WRITE_OPTIONS, + NULL)); +} + + +typedef struct GADatasetFileSystemDatasetPrivate_ { + GADatasetFileFormat *format; + GArrowFileSystem *file_system; + GADatasetPartitioning *partitioning; +} GADatasetFileSystemDatasetPrivate; + +enum { + PROP_FILE_SYSTEM_DATASET_FORMAT = 1, + PROP_FILE_SYSTEM_DATASET_FILE_SYSTEM, + PROP_FILE_SYSTEM_DATASET_PARTITIONING, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileSystemDataset, + gadataset_file_system_dataset, + GADATASET_TYPE_DATASET) + +#define GADATASET_FILE_SYSTEM_DATASET_GET_PRIVATE(obj) \ + static_cast<GADatasetFileSystemDatasetPrivate *>( \ + gadataset_file_system_dataset_get_instance_private( \ + GADATASET_FILE_SYSTEM_DATASET(obj))) + +static void +gadataset_file_system_dataset_dispose(GObject *object) +{ + auto priv = GADATASET_FILE_SYSTEM_DATASET_GET_PRIVATE(object); + + if (priv->format) { + g_object_unref(priv->format); + priv->format = NULL; + } + + if (priv->file_system) { + g_object_unref(priv->file_system); + priv->file_system = NULL; + } + + G_OBJECT_CLASS(gadataset_file_system_dataset_parent_class)->dispose(object); +} + +static void +gadataset_file_system_dataset_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GADATASET_FILE_SYSTEM_DATASET_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_FILE_SYSTEM_DATASET_FORMAT: + priv->format = GADATASET_FILE_FORMAT(g_value_dup_object(value)); + break; + case PROP_FILE_SYSTEM_DATASET_FILE_SYSTEM: + priv->file_system = GARROW_FILE_SYSTEM(g_value_dup_object(value)); + break; + case PROP_FILE_SYSTEM_DATASET_PARTITIONING: + priv->partitioning = GADATASET_PARTITIONING(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gadataset_file_system_dataset_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GADATASET_FILE_SYSTEM_DATASET_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_FILE_SYSTEM_DATASET_FORMAT: + g_value_set_object(value, priv->format); + break; + case PROP_FILE_SYSTEM_DATASET_FILE_SYSTEM: + g_value_set_object(value, priv->file_system); + break; + case PROP_FILE_SYSTEM_DATASET_PARTITIONING: + g_value_set_object(value, priv->partitioning); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gadataset_file_system_dataset_init(GADatasetFileSystemDataset *object) +{ +} + +static void +gadataset_file_system_dataset_class_init(GADatasetFileSystemDatasetClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->dispose = gadataset_file_system_dataset_dispose; + gobject_class->set_property = gadataset_file_system_dataset_set_property; + gobject_class->get_property = gadataset_file_system_dataset_get_property; + + GParamSpec *spec; + /** + * GADatasetFileSystemDataset:format: + * + * Format of the dataset. + * + * Since: 5.0.0 + */ + spec = g_param_spec_object("format", + "Format", + "Format of the dataset", + GADATASET_TYPE_FILE_FORMAT, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, + PROP_FILE_SYSTEM_DATASET_FORMAT, + spec); + + /** + * GADatasetFileSystemDataset:file-system: + * + * File system of the dataset. + * + * Since: 5.0.0 + */ + spec = g_param_spec_object("file-system", + "File system", + "File system of the dataset", + GARROW_TYPE_FILE_SYSTEM, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, + PROP_FILE_SYSTEM_DATASET_FILE_SYSTEM, + spec); + + /** + * GADatasetFileSystemDataset:partitioning: + * + * Partitioning of the dataset. + * + * Since: 6.0.0 + */ + spec = g_param_spec_object("partitioning", + "Partitioning", + "Partitioning of the dataset", + GADATASET_TYPE_PARTITIONING, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, + PROP_FILE_SYSTEM_DATASET_PARTITIONING, + spec); +} + +/** + * gadataset_file_system_dataset_write_scanner: + * @scanner: A #GADatasetScanner that produces data to be written. + * @options: A #GADatasetFileSystemDatasetWriteOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE on error. + * + * Since: 6.0.0 + */ +gboolean +gadataset_file_system_dataset_write_scanner( + GADatasetScanner *scanner, + GADatasetFileSystemDatasetWriteOptions *options, + GError **error) +{ + auto arrow_scanner = gadataset_scanner_get_raw(scanner); + auto arrow_options = + gadataset_file_system_dataset_write_options_get_raw(options); + auto status = + arrow::dataset::FileSystemDataset::Write(*arrow_options, arrow_scanner); + return garrow::check(error, + status, + "[file-system-dataset][write-scanner]"); +} + + +G_END_DECLS + +GADatasetDataset * +gadataset_dataset_new_raw( + std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset) +{ + return gadataset_dataset_new_raw(arrow_dataset, + "dataset", arrow_dataset, + NULL); +} + +GADatasetDataset * +gadataset_dataset_new_raw( + std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset, + const gchar *first_property_name, + ...) +{ + va_list args; + va_start(args, first_property_name); + auto array = gadataset_dataset_new_raw_valist(arrow_dataset, + first_property_name, + args); + va_end(args); + return array; +} + +GADatasetDataset * +gadataset_dataset_new_raw_valist( + std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset, + const gchar *first_property_name, + va_list args) +{ + GType type = GADATASET_TYPE_DATASET; + const auto type_name = (*arrow_dataset)->type_name(); + if (type_name == "filesystem") { + type = GADATASET_TYPE_FILE_SYSTEM_DATASET; + } + return GADATASET_DATASET(g_object_new_valist(type, + first_property_name, + args)); +} + +std::shared_ptr<arrow::dataset::Dataset> +gadataset_dataset_get_raw(GADatasetDataset *dataset) +{ + auto priv = GADATASET_DATASET_GET_PRIVATE(dataset); + return priv->dataset; +} + +arrow::dataset::FileSystemDatasetWriteOptions * +gadataset_file_system_dataset_write_options_get_raw( + GADatasetFileSystemDatasetWriteOptions *options) +{ + auto priv = GADATASET_FILE_SYSTEM_DATASET_WRITE_OPTIONS_GET_PRIVATE(options); + return &(priv->options); +} diff --git a/src/arrow/c_glib/arrow-dataset-glib/dataset.h b/src/arrow/c_glib/arrow-dataset-glib/dataset.h new file mode 100644 index 000000000..86d077caa --- /dev/null +++ b/src/arrow/c_glib/arrow-dataset-glib/dataset.h @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-dataset-glib/file-format.h> + +G_BEGIN_DECLS + +typedef struct _GADatasetScannerBuilder GADatasetScannerBuilder; +typedef struct _GADatasetScanner GADatasetScanner; + +#define GADATASET_TYPE_DATASET (gadataset_dataset_get_type()) +G_DECLARE_DERIVABLE_TYPE(GADatasetDataset, + gadataset_dataset, + GADATASET, + DATASET, + GObject) +struct _GADatasetDatasetClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GADatasetScannerBuilder * +gadataset_dataset_begin_scan(GADatasetDataset *dataset, + GError **error); +GARROW_AVAILABLE_IN_5_0 +GArrowTable * +gadataset_dataset_to_table(GADatasetDataset *dataset, + GError **error); +GARROW_AVAILABLE_IN_5_0 +gchar * +gadataset_dataset_get_type_name(GADatasetDataset *dataset); + + +#define GADATASET_TYPE_FILE_SYSTEM_DATASET_WRITE_OPTIONS \ + (gadataset_file_system_dataset_write_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GADatasetFileSystemDatasetWriteOptions, + gadataset_file_system_dataset_write_options, + GADATASET, + FILE_SYSTEM_DATASET_WRITE_OPTIONS, + GObject) +struct _GADatasetFileSystemDatasetWriteOptionsClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GADatasetFileSystemDatasetWriteOptions * +gadataset_file_system_dataset_write_options_new(void); + + +#define GADATASET_TYPE_FILE_SYSTEM_DATASET \ + (gadataset_file_system_dataset_get_type()) +G_DECLARE_DERIVABLE_TYPE(GADatasetFileSystemDataset, + gadataset_file_system_dataset, + GADATASET, + FILE_SYSTEM_DATASET, + GADatasetDataset) +struct _GADatasetFileSystemDatasetClass +{ + GADatasetDatasetClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +gboolean +gadataset_file_system_dataset_write_scanner( + GADatasetScanner *scanner, + GADatasetFileSystemDatasetWriteOptions *options, + GError **error); + + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-dataset-glib/dataset.hpp b/src/arrow/c_glib/arrow-dataset-glib/dataset.hpp new file mode 100644 index 000000000..1dab391e8 --- /dev/null +++ b/src/arrow/c_glib/arrow-dataset-glib/dataset.hpp @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/dataset/api.h> + +#include <arrow-dataset-glib/dataset.h> + + +GADatasetDataset * +gadataset_dataset_new_raw( + std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset); +GADatasetDataset * +gadataset_dataset_new_raw( + std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset, + const gchar *first_property_name, + ...); +GADatasetDataset * +gadataset_dataset_new_raw_valist( + std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset, + const gchar *first_property_name, + va_list arg); +std::shared_ptr<arrow::dataset::Dataset> +gadataset_dataset_get_raw(GADatasetDataset *dataset); + + +arrow::dataset::FileSystemDatasetWriteOptions * +gadataset_file_system_dataset_write_options_get_raw( + GADatasetFileSystemDatasetWriteOptions *options); diff --git a/src/arrow/c_glib/arrow-dataset-glib/enums.c.template b/src/arrow/c_glib/arrow-dataset-glib/enums.c.template new file mode 100644 index 000000000..8921ab062 --- /dev/null +++ b/src/arrow/c_glib/arrow-dataset-glib/enums.c.template @@ -0,0 +1,52 @@ +/*** BEGIN file-header ***/ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-dataset-glib/arrow-dataset-glib.h> +/*** END file-header ***/ + +/*** BEGIN file-production ***/ + +/* enumerations from "@filename@" */ +/*** END file-production ***/ + +/*** BEGIN value-header ***/ +GType +@enum_name@_get_type(void) +{ + static GType etype = 0; + if (G_UNLIKELY(etype == 0)) { + static const G@Type@Value values[] = { +/*** END value-header ***/ + +/*** BEGIN value-production ***/ + {@VALUENAME@, "@VALUENAME@", "@valuenick@"}, +/*** END value-production ***/ + +/*** BEGIN value-tail ***/ + {0, NULL, NULL} + }; + etype = g_@type@_register_static(g_intern_static_string("@EnumName@"), values); + } + return etype; +} +/*** END value-tail ***/ + +/*** BEGIN file-tail ***/ +/*** END file-tail ***/ diff --git a/src/arrow/c_glib/arrow-dataset-glib/enums.h.template b/src/arrow/c_glib/arrow-dataset-glib/enums.h.template new file mode 100644 index 000000000..d6a0a455f --- /dev/null +++ b/src/arrow/c_glib/arrow-dataset-glib/enums.h.template @@ -0,0 +1,41 @@ +/*** BEGIN file-header ***/ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-dataset-glib/partitioning.h> + +G_BEGIN_DECLS +/*** END file-header ***/ + +/*** BEGIN file-production ***/ + +/* enumerations from "@filename@" */ +/*** END file-production ***/ + +/*** BEGIN value-header ***/ +GType @enum_name@_get_type(void) G_GNUC_CONST; +#define @ENUMPREFIX@_TYPE_@ENUMSHORT@ (@enum_name@_get_type()) +/*** END value-header ***/ + +/*** BEGIN file-tail ***/ + +G_END_DECLS +/*** END file-tail ***/ diff --git a/src/arrow/c_glib/arrow-dataset-glib/file-format.cpp b/src/arrow/c_glib/arrow-dataset-glib/file-format.cpp new file mode 100644 index 000000000..c0c92d966 --- /dev/null +++ b/src/arrow/c_glib/arrow-dataset-glib/file-format.cpp @@ -0,0 +1,574 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/error.hpp> +#include <arrow-glib/file-system.hpp> +#include <arrow-glib/output-stream.hpp> +#include <arrow-glib/record-batch.hpp> +#include <arrow-glib/reader.hpp> +#include <arrow-glib/schema.hpp> + +#include <arrow-dataset-glib/file-format.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: file-format + * @section_id: file-format + * @title: File format classes + * @include: arrow-dataset-glib/arrow-dataset-glib.h + * + * #GADatasetFileWriteOptions is a class for options to write a file + * of this format. + * + * #GADatasetFileWriter is a class for writing a file of this format. + * + * #GADatasetFileFormat is a base class for file format classes. + * + * #GADatasetCSVFileFormat is a class for CSV file format. + * + * #GADatasetIPCFileFormat is a class for IPC file format. + * + * #GADatasetParquetFileFormat is a class for Parquet file format. + * + * Since: 3.0.0 + */ + +typedef struct GADatasetFileWriteOptionsPrivate_ { + std::shared_ptr<arrow::dataset::FileWriteOptions> options; +} GADatasetFileWriteOptionsPrivate; + +enum { + PROP_OPTIONS = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileWriteOptions, + gadataset_file_write_options, + G_TYPE_OBJECT) + +#define GADATASET_FILE_WRITE_OPTIONS_GET_PRIVATE(obj) \ + static_cast<GADatasetFileWriteOptionsPrivate *>( \ + gadataset_file_write_options_get_instance_private( \ + GADATASET_FILE_WRITE_OPTIONS(obj))) + +static void +gadataset_file_write_options_finalize(GObject *object) +{ + auto priv = GADATASET_FILE_WRITE_OPTIONS_GET_PRIVATE(object); + priv->options.~shared_ptr(); + G_OBJECT_CLASS(gadataset_file_write_options_parent_class)->finalize(object); +} + +static void +gadataset_file_write_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GADATASET_FILE_WRITE_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_OPTIONS: + priv->options = + *static_cast<std::shared_ptr<arrow::dataset::FileWriteOptions> *>( + g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gadataset_file_write_options_init(GADatasetFileWriteOptions *object) +{ + auto priv = GADATASET_FILE_WRITE_OPTIONS_GET_PRIVATE(object); + new(&priv->options) std::shared_ptr<arrow::dataset::FileWriteOptions>; +} + +static void +gadataset_file_write_options_class_init(GADatasetFileWriteOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = gadataset_file_write_options_finalize; + gobject_class->set_property = gadataset_file_write_options_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("options", + "Options", + "The raw " + "std::shared<arrow::dataset::FileWriteOptions> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_OPTIONS, spec); +} + + +typedef struct GADatasetFileWriterPrivate_ { + std::shared_ptr<arrow::dataset::FileWriter> writer; +} GADatasetFileWriterPrivate; + +enum { + PROP_WRITER = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileWriter, + gadataset_file_writer, + G_TYPE_OBJECT) + +#define GADATASET_FILE_WRITER_GET_PRIVATE(obj) \ + static_cast<GADatasetFileWriterPrivate *>( \ + gadataset_file_writer_get_instance_private( \ + GADATASET_FILE_WRITER(obj))) + +static void +gadataset_file_writer_finalize(GObject *object) +{ + auto priv = GADATASET_FILE_WRITER_GET_PRIVATE(object); + priv->writer.~shared_ptr(); + G_OBJECT_CLASS(gadataset_file_writer_parent_class)->finalize(object); +} + +static void +gadataset_file_writer_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GADATASET_FILE_WRITER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_WRITER: + priv->writer = + *static_cast<std::shared_ptr<arrow::dataset::FileWriter> *>( + g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gadataset_file_writer_init(GADatasetFileWriter *object) +{ + auto priv = GADATASET_FILE_WRITER_GET_PRIVATE(object); + new(&(priv->writer)) std::shared_ptr<arrow::dataset::FileWriter>; +} + +static void +gadataset_file_writer_class_init(GADatasetFileWriterClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = gadataset_file_writer_finalize; + gobject_class->set_property = gadataset_file_writer_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("writer", + "Writer", + "The raw " + "std::shared<arrow::dataset::FileWriter> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_WRITER, spec); +} + +/** + * gadataset_file_writer_write_record_batch: + * @writer: A #GADatasetFileWriter. + * @record_batch: A #GArrowRecordBatch to be written. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE on error. + * + * Since: 6.0.0 + */ +gboolean +gadataset_file_writer_write_record_batch(GADatasetFileWriter *writer, + GArrowRecordBatch *record_batch, + GError **error) +{ + const auto arrow_writer = gadataset_file_writer_get_raw(writer); + const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch); + auto status = arrow_writer->Write(arrow_record_batch); + return garrow::check(error, status, "[file-writer][write-record-batch]"); +} + +/** + * gadataset_file_writer_write_record_batch_reader: + * @writer: A #GADatasetFileWriter. + * @reader: A #GArrowRecordBatchReader to be written. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE on error. + * + * Since: 6.0.0 + */ +gboolean +gadataset_file_writer_write_record_batch_reader(GADatasetFileWriter *writer, + GArrowRecordBatchReader *reader, + GError **error) +{ + const auto arrow_writer = gadataset_file_writer_get_raw(writer); + auto arrow_reader = garrow_record_batch_reader_get_raw(reader); + auto status = arrow_writer->Write(arrow_reader.get()); + return garrow::check(error, + status, + "[file-writer][write-record-batch-reader]"); +} + +/** + * gadataset_file_writer_finish: + * @writer: A #GADatasetFileWriter. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE on error. + * + * Since: 6.0.0 + */ +gboolean +gadataset_file_writer_finish(GADatasetFileWriter *writer, + GError **error) +{ + const auto arrow_writer = gadataset_file_writer_get_raw(writer); + auto status = arrow_writer->Finish(); + return garrow::check(error, + status, + "[file-writer][finish]"); +} + + +typedef struct GADatasetFileFormatPrivate_ { + std::shared_ptr<arrow::dataset::FileFormat> format; +} GADatasetFileFormatPrivate; + +enum { + PROP_FORMAT = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileFormat, + gadataset_file_format, + G_TYPE_OBJECT) + +#define GADATASET_FILE_FORMAT_GET_PRIVATE(obj) \ + static_cast<GADatasetFileFormatPrivate *>( \ + gadataset_file_format_get_instance_private( \ + GADATASET_FILE_FORMAT(obj))) + +static void +gadataset_file_format_finalize(GObject *object) +{ + auto priv = GADATASET_FILE_FORMAT_GET_PRIVATE(object); + priv->format.~shared_ptr(); + G_OBJECT_CLASS(gadataset_file_format_parent_class)->finalize(object); +} + +static void +gadataset_file_format_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GADATASET_FILE_FORMAT_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_FORMAT: + priv->format = + *static_cast<std::shared_ptr<arrow::dataset::FileFormat> *>( + g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gadataset_file_format_init(GADatasetFileFormat *object) +{ + auto priv = GADATASET_FILE_FORMAT_GET_PRIVATE(object); + new(&priv->format) std::shared_ptr<arrow::dataset::FileFormat>; +} + +static void +gadataset_file_format_class_init(GADatasetFileFormatClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = gadataset_file_format_finalize; + gobject_class->set_property = gadataset_file_format_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("format", + "Format", + "The raw std::shared<arrow::dataset::FileFormat> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_FORMAT, spec); +} + +/** + * gadataset_file_format_get_type_name: + * @format: A #GADatasetFileFormat. + * + * Returns: The type name of @format. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 3.0.0 + */ +gchar * +gadataset_file_format_get_type_name(GADatasetFileFormat *format) +{ + const auto arrow_format = gadataset_file_format_get_raw(format); + const auto &type_name = arrow_format->type_name(); + return g_strndup(type_name.data(), type_name.size()); +} + +/** + * gadataset_file_format_get_default_write_options: + * @format: A #GADatasetFileFormat. + * + * Returns: (transfer full): The default #GADatasetFileWriteOptions of @format. + * + * Since: 6.0.0 + */ +GADatasetFileWriteOptions * +gadataset_file_format_get_default_write_options(GADatasetFileFormat *format) +{ + const auto arrow_format = gadataset_file_format_get_raw(format); + auto arrow_options = arrow_format->DefaultWriteOptions(); + return gadataset_file_write_options_new_raw(&arrow_options); +} + +/** + * gadataset_file_format_open_writer: + * @format: A #GADatasetFileFormat. + * @destination: A #GArrowOutputStream. + * @file_system: The #GArrowFileSystem of @destination. + * @path: The path of @destination. + * @schema: A #GArrowSchema that is used by written record batches. + * @options: A #GADatasetFileWriteOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): The newly created #GADatasetFileWriter of @format + * on success, %NULL on error. + * + * Since: 6.0.0 + */ +GADatasetFileWriter * +gadataset_file_format_open_writer(GADatasetFileFormat *format, + GArrowOutputStream *destination, + GArrowFileSystem *file_system, + const gchar *path, + GArrowSchema *schema, + GADatasetFileWriteOptions *options, + GError **error) +{ + const auto arrow_format = gadataset_file_format_get_raw(format); + auto arrow_destination = garrow_output_stream_get_raw(destination); + auto arrow_file_system = garrow_file_system_get_raw(file_system); + auto arrow_schema = garrow_schema_get_raw(schema); + auto arrow_options = gadataset_file_write_options_get_raw(options); + auto arrow_writer_result = + arrow_format->MakeWriter(arrow_destination, + arrow_schema, + arrow_options, + {arrow_file_system, path}); + if (garrow::check(error, arrow_writer_result, "[file-format][open-writer]")) { + auto arrow_writer = *arrow_writer_result; + return gadataset_file_writer_new_raw(&arrow_writer); + } else { + return NULL; + } +} + +/** + * gadataset_file_format_equal: + * @format: A #GADatasetFileFormat. + * @other_format: A #GADatasetFileFormat to be compared. + * + * Returns: %TRUE if they are the same content file format, %FALSE otherwise. + * + * Since: 3.0.0 + */ +gboolean +gadataset_file_format_equal(GADatasetFileFormat *format, + GADatasetFileFormat *other_format) +{ + const auto arrow_format = gadataset_file_format_get_raw(format); + const auto arrow_other_format = gadataset_file_format_get_raw(other_format); + return arrow_format->Equals(*arrow_other_format); +} + + +G_DEFINE_TYPE(GADatasetCSVFileFormat, + gadataset_csv_file_format, + GADATASET_TYPE_FILE_FORMAT) + +static void +gadataset_csv_file_format_init(GADatasetCSVFileFormat *object) +{ +} + +static void +gadataset_csv_file_format_class_init(GADatasetCSVFileFormatClass *klass) +{ +} + +/** + * gadataset_csv_file_format_new: + * + * Returns: The newly created CSV file format. + * + * Since: 3.0.0 + */ +GADatasetCSVFileFormat * +gadataset_csv_file_format_new(void) +{ + std::shared_ptr<arrow::dataset::FileFormat> arrow_format = + std::make_shared<arrow::dataset::CsvFileFormat>(); + return GADATASET_CSV_FILE_FORMAT(gadataset_file_format_new_raw(&arrow_format)); +} + + +G_DEFINE_TYPE(GADatasetIPCFileFormat, + gadataset_ipc_file_format, + GADATASET_TYPE_FILE_FORMAT) + +static void +gadataset_ipc_file_format_init(GADatasetIPCFileFormat *object) +{ +} + +static void +gadataset_ipc_file_format_class_init(GADatasetIPCFileFormatClass *klass) +{ +} + +/** + * gadataset_ipc_file_format_new: + * + * Returns: The newly created IPC file format. + * + * Since: 3.0.0 + */ +GADatasetIPCFileFormat * +gadataset_ipc_file_format_new(void) +{ + std::shared_ptr<arrow::dataset::FileFormat> arrow_format = + std::make_shared<arrow::dataset::IpcFileFormat>(); + return GADATASET_IPC_FILE_FORMAT(gadataset_file_format_new_raw(&arrow_format)); +} + + +G_DEFINE_TYPE(GADatasetParquetFileFormat, + gadataset_parquet_file_format, + GADATASET_TYPE_FILE_FORMAT) + +static void +gadataset_parquet_file_format_init(GADatasetParquetFileFormat *object) +{ +} + +static void +gadataset_parquet_file_format_class_init(GADatasetParquetFileFormatClass *klass) +{ +} + +/** + * gadataset_parquet_file_format_new: + * + * Returns: The newly created Parquet file format. + * + * Since: 3.0.0 + */ +GADatasetParquetFileFormat * +gadataset_parquet_file_format_new(void) +{ + std::shared_ptr<arrow::dataset::FileFormat> arrow_format = + std::make_shared<arrow::dataset::ParquetFileFormat>(); + return GADATASET_PARQUET_FILE_FORMAT( + gadataset_file_format_new_raw(&arrow_format)); +} + + +G_END_DECLS + +GADatasetFileWriteOptions * +gadataset_file_write_options_new_raw( + std::shared_ptr<arrow::dataset::FileWriteOptions> *arrow_options) +{ + return GADATASET_FILE_WRITE_OPTIONS( + g_object_new(GADATASET_TYPE_FILE_WRITE_OPTIONS, + "options", arrow_options, + NULL)); +} + +std::shared_ptr<arrow::dataset::FileWriteOptions> +gadataset_file_write_options_get_raw(GADatasetFileWriteOptions *options) +{ + auto priv = GADATASET_FILE_WRITE_OPTIONS_GET_PRIVATE(options); + return priv->options; +} + + +GADatasetFileWriter * +gadataset_file_writer_new_raw( + std::shared_ptr<arrow::dataset::FileWriter> *arrow_writer) +{ + return GADATASET_FILE_WRITER(g_object_new(GADATASET_TYPE_FILE_WRITER, + "writer", arrow_writer, + NULL)); +} + +std::shared_ptr<arrow::dataset::FileWriter> +gadataset_file_writer_get_raw(GADatasetFileWriter *writer) +{ + auto priv = GADATASET_FILE_WRITER_GET_PRIVATE(writer); + return priv->writer; +} + + +GADatasetFileFormat * +gadataset_file_format_new_raw( + std::shared_ptr<arrow::dataset::FileFormat> *arrow_format) +{ + GType type = GADATASET_TYPE_FILE_FORMAT; + const auto &type_name = (*arrow_format)->type_name(); + if (type_name == "csv") { + type = GADATASET_TYPE_CSV_FILE_FORMAT; + } else if (type_name == "ipc") { + type = GADATASET_TYPE_IPC_FILE_FORMAT; + } else if (type_name == "parquet") { + type = GADATASET_TYPE_PARQUET_FILE_FORMAT; + } + return GADATASET_FILE_FORMAT(g_object_new(type, + "format", arrow_format, + NULL)); +} + +std::shared_ptr<arrow::dataset::FileFormat> +gadataset_file_format_get_raw(GADatasetFileFormat *format) +{ + auto priv = GADATASET_FILE_FORMAT_GET_PRIVATE(format); + return priv->format; +} diff --git a/src/arrow/c_glib/arrow-dataset-glib/file-format.h b/src/arrow/c_glib/arrow-dataset-glib/file-format.h new file mode 100644 index 000000000..16a834074 --- /dev/null +++ b/src/arrow/c_glib/arrow-dataset-glib/file-format.h @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/arrow-glib.h> + +G_BEGIN_DECLS + +#define GADATASET_TYPE_FILE_WRITE_OPTIONS \ + (gadataset_file_write_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GADatasetFileWriteOptions, + gadataset_file_write_options, + GADATASET, + FILE_WRITE_OPTIONS, + GObject) +struct _GADatasetFileWriteOptionsClass +{ + GObjectClass parent_class; +}; + + +#define GADATASET_TYPE_FILE_WRITER \ + (gadataset_file_writer_get_type()) +G_DECLARE_DERIVABLE_TYPE(GADatasetFileWriter, + gadataset_file_writer, + GADATASET, + FILE_WRITER, + GObject) +struct _GADatasetFileWriterClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +gboolean +gadataset_file_writer_write_record_batch(GADatasetFileWriter *writer, + GArrowRecordBatch *record_batch, + GError **error); +GARROW_AVAILABLE_IN_6_0 +gboolean +gadataset_file_writer_write_record_batch_reader(GADatasetFileWriter *writer, + GArrowRecordBatchReader *reader, + GError **error); +GARROW_AVAILABLE_IN_6_0 +gboolean +gadataset_file_writer_finish(GADatasetFileWriter *writer, + GError **error); + + +#define GADATASET_TYPE_FILE_FORMAT (gadataset_file_format_get_type()) +G_DECLARE_DERIVABLE_TYPE(GADatasetFileFormat, + gadataset_file_format, + GADATASET, + FILE_FORMAT, + GObject) +struct _GADatasetFileFormatClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_3_0 +gchar * +gadataset_file_format_get_type_name(GADatasetFileFormat *format); +GARROW_AVAILABLE_IN_6_0 +GADatasetFileWriteOptions * +gadataset_file_format_get_default_write_options(GADatasetFileFormat *format); +GARROW_AVAILABLE_IN_6_0 +GADatasetFileWriter * +gadataset_file_format_open_writer(GADatasetFileFormat *format, + GArrowOutputStream *destination, + GArrowFileSystem *file_system, + const gchar *path, + GArrowSchema *schema, + GADatasetFileWriteOptions *options, + GError **error); + +GARROW_AVAILABLE_IN_3_0 +gboolean +gadataset_file_format_equal(GADatasetFileFormat *format, + GADatasetFileFormat *other_format); + + +#define GADATASET_TYPE_CSV_FILE_FORMAT (gadataset_csv_file_format_get_type()) +G_DECLARE_DERIVABLE_TYPE(GADatasetCSVFileFormat, + gadataset_csv_file_format, + GADATASET, + CSV_FILE_FORMAT, + GADatasetFileFormat) +struct _GADatasetCSVFileFormatClass +{ + GADatasetFileFormatClass parent_class; +}; + +GARROW_AVAILABLE_IN_3_0 +GADatasetCSVFileFormat *gadataset_csv_file_format_new(void); + + +#define GADATASET_TYPE_IPC_FILE_FORMAT (gadataset_ipc_file_format_get_type()) +G_DECLARE_DERIVABLE_TYPE(GADatasetIPCFileFormat, + gadataset_ipc_file_format, + GADATASET, + IPC_FILE_FORMAT, + GADatasetFileFormat) +struct _GADatasetIPCFileFormatClass +{ + GADatasetFileFormatClass parent_class; +}; + +GARROW_AVAILABLE_IN_3_0 +GADatasetIPCFileFormat *gadataset_ipc_file_format_new(void); + + +#define GADATASET_TYPE_PARQUET_FILE_FORMAT \ + (gadataset_parquet_file_format_get_type()) +G_DECLARE_DERIVABLE_TYPE(GADatasetParquetFileFormat, + gadataset_parquet_file_format, + GADATASET, + PARQUET_FILE_FORMAT, + GADatasetFileFormat) +struct _GADatasetParquetFileFormatClass +{ + GADatasetFileFormatClass parent_class; +}; + +GARROW_AVAILABLE_IN_3_0 +GADatasetParquetFileFormat *gadataset_parquet_file_format_new(void); + + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-dataset-glib/file-format.hpp b/src/arrow/c_glib/arrow-dataset-glib/file-format.hpp new file mode 100644 index 000000000..636dc5c01 --- /dev/null +++ b/src/arrow/c_glib/arrow-dataset-glib/file-format.hpp @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/dataset/api.h> + +#include <arrow-dataset-glib/file-format.h> + +GADatasetFileWriteOptions * +gadataset_file_write_options_new_raw( + std::shared_ptr<arrow::dataset::FileWriteOptions> *arrow_options); +std::shared_ptr<arrow::dataset::FileWriteOptions> +gadataset_file_write_options_get_raw(GADatasetFileWriteOptions *options); + + +GADatasetFileWriter * +gadataset_file_writer_new_raw( + std::shared_ptr<arrow::dataset::FileWriter> *arrow_writer); +std::shared_ptr<arrow::dataset::FileWriter> +gadataset_file_writer_get_raw(GADatasetFileWriter *writer); + + +GADatasetFileFormat * +gadataset_file_format_new_raw( + std::shared_ptr<arrow::dataset::FileFormat> *arrow_format); +std::shared_ptr<arrow::dataset::FileFormat> +gadataset_file_format_get_raw(GADatasetFileFormat *format); diff --git a/src/arrow/c_glib/arrow-dataset-glib/fragment.cpp b/src/arrow/c_glib/arrow-dataset-glib/fragment.cpp new file mode 100644 index 000000000..f2f0cd1c3 --- /dev/null +++ b/src/arrow/c_glib/arrow-dataset-glib/fragment.cpp @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/record-batch.hpp> +#include <arrow-glib/schema.hpp> + +#include <arrow-dataset-glib/fragment.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: fragment + * @section_id: fragment + * @title: Fragment classes + * @include: arrow-dataset-glib/arrow-dataset-glib.h + * + * #GADatasetFragment is a base class for all fragment classes. + * + * #GADatasetInMemoryFragment is a class for in-memory fragment. + * + * Since: 4.0.0 + */ + +/* arrow::dataset::Fragment */ + +typedef struct GADatasetFragmentPrivate_ { + std::shared_ptr<arrow::dataset::Fragment> fragment; +} GADatasetFragmentPrivate; + +enum { + PROP_FRAGMENT = 1, +}; + +G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADatasetFragment, + gadataset_fragment, + G_TYPE_OBJECT) + +#define GADATASET_FRAGMENT_GET_PRIVATE(obj) \ + static_cast<GADatasetFragmentPrivate *>( \ + gadataset_fragment_get_instance_private( \ + GADATASET_FRAGMENT(obj))) + +static void +gadataset_fragment_finalize(GObject *object) +{ + auto priv = GADATASET_FRAGMENT_GET_PRIVATE(object); + + priv->fragment.~shared_ptr(); + + G_OBJECT_CLASS(gadataset_fragment_parent_class)->finalize(object); +} + +static void +gadataset_fragment_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GADATASET_FRAGMENT_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_FRAGMENT: + priv->fragment = + *static_cast<std::shared_ptr<arrow::dataset::Fragment> *>( + g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gadataset_fragment_init(GADatasetFragment *object) +{ + auto priv = GADATASET_FRAGMENT_GET_PRIVATE(object); + new(&priv->fragment) std::shared_ptr<arrow::dataset::Fragment>; +} + +static void +gadataset_fragment_class_init(GADatasetFragmentClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = gadataset_fragment_finalize; + gobject_class->set_property = gadataset_fragment_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("fragment", + "Fragment", + "The raw std::shared<arrow::dataset::Fragment> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_FRAGMENT, spec); +} + +/* arrow::dataset::InMemoryFragment */ + +G_DEFINE_TYPE(GADatasetInMemoryFragment, + gadataset_in_memory_fragment, + GADATASET_TYPE_FRAGMENT) + +static void +gadataset_in_memory_fragment_init(GADatasetInMemoryFragment *object) +{ +} + +static void +gadataset_in_memory_fragment_class_init(GADatasetInMemoryFragmentClass *klass) +{ +} + +/** + * gadataset_in_memory_fragment_new: + * @schema: A #GArrowSchema. + * @record_batches: (array length=n_record_batches): + * (element-type GArrowRecordBatch): The record batches of the table. + * @n_record_batches: The number of record batches. + * + * Returns: A newly created #GADatasetInMemoryFragment. + * + * Since: 4.0.0 + */ +GADatasetInMemoryFragment * +gadataset_in_memory_fragment_new(GArrowSchema *schema, + GArrowRecordBatch **record_batches, + gsize n_record_batches) +{ + auto arrow_schema = garrow_schema_get_raw(schema); + std::vector<std::shared_ptr<arrow::RecordBatch>> arrow_record_batches; + arrow_record_batches.reserve(n_record_batches); + for (gsize i = 0; i < n_record_batches; ++i) { + auto arrow_record_batch = garrow_record_batch_get_raw(record_batches[i]); + arrow_record_batches.push_back(arrow_record_batch); + } + auto arrow_in_memory_fragment = + std::make_shared<arrow::dataset::InMemoryFragment>(arrow_schema, + arrow_record_batches); + return gadataset_in_memory_fragment_new_raw(&arrow_in_memory_fragment); +} + +G_END_DECLS + +GADatasetFragment * +gadataset_fragment_new_raw( + std::shared_ptr<arrow::dataset::Fragment> *arrow_fragment) +{ + auto fragment = + GADATASET_FRAGMENT(g_object_new(GADATASET_TYPE_FRAGMENT, + "fragment", arrow_fragment, + NULL)); + return fragment; +} + +std::shared_ptr<arrow::dataset::Fragment> +gadataset_fragment_get_raw(GADatasetFragment *fragment) +{ + auto priv = GADATASET_FRAGMENT_GET_PRIVATE(fragment); + return priv->fragment; +} + +GADatasetInMemoryFragment * +gadataset_in_memory_fragment_new_raw( + std::shared_ptr<arrow::dataset::InMemoryFragment> *arrow_fragment) +{ + auto fragment = + GADATASET_IN_MEMORY_FRAGMENT(g_object_new(GADATASET_TYPE_IN_MEMORY_FRAGMENT, + "fragment", arrow_fragment, + NULL)); + return fragment; +} diff --git a/src/arrow/c_glib/arrow-dataset-glib/fragment.h b/src/arrow/c_glib/arrow-dataset-glib/fragment.h new file mode 100644 index 000000000..9376b6cf3 --- /dev/null +++ b/src/arrow/c_glib/arrow-dataset-glib/fragment.h @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/arrow-glib.h> + +G_BEGIN_DECLS + +/* arrow::dataset::Fragment */ + +#define GADATASET_TYPE_FRAGMENT (gadataset_fragment_get_type()) +G_DECLARE_DERIVABLE_TYPE(GADatasetFragment, + gadataset_fragment, + GADATASET, + FRAGMENT, + GObject) +struct _GADatasetFragmentClass +{ + GObjectClass parent_class; +}; + +/* arrow::dataset::InMemoryFragment */ + +#define GADATASET_TYPE_IN_MEMORY_FRAGMENT \ + (gadataset_in_memory_fragment_get_type()) +G_DECLARE_DERIVABLE_TYPE(GADatasetInMemoryFragment, + gadataset_in_memory_fragment, + GADATASET, + IN_MEMORY_FRAGMENT, + GADatasetFragment) +struct _GADatasetInMemoryFragmentClass +{ + GADatasetFragmentClass parent_class; +}; + +GARROW_AVAILABLE_IN_4_0 +GADatasetInMemoryFragment * +gadataset_in_memory_fragment_new(GArrowSchema *schema, + GArrowRecordBatch **record_batches, + gsize n_record_batches); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-dataset-glib/fragment.hpp b/src/arrow/c_glib/arrow-dataset-glib/fragment.hpp new file mode 100644 index 000000000..904f83653 --- /dev/null +++ b/src/arrow/c_glib/arrow-dataset-glib/fragment.hpp @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/dataset/api.h> + +#include <arrow-dataset-glib/fragment.h> + +std::shared_ptr<arrow::dataset::Fragment> +gadataset_fragment_get_raw(GADatasetFragment *fragment); + +GADatasetFragment* +gadataset_fragment_new_raw( + std::shared_ptr<arrow::dataset::Fragment> *arrow_fragment); + +GADatasetInMemoryFragment* +gadataset_in_memory_fragment_new_raw( + std::shared_ptr<arrow::dataset::InMemoryFragment> *arrow_fragment); diff --git a/src/arrow/c_glib/arrow-dataset-glib/meson.build b/src/arrow/c_glib/arrow-dataset-glib/meson.build new file mode 100644 index 000000000..0d9b8564e --- /dev/null +++ b/src/arrow/c_glib/arrow-dataset-glib/meson.build @@ -0,0 +1,104 @@ +# -*- indent-tabs-mode: nil -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +sources = files( + 'dataset-factory.cpp', + 'dataset.cpp', + 'file-format.cpp', + 'fragment.cpp', + 'partitioning.cpp', + 'scanner.cpp', +) + +c_headers = files( + 'arrow-dataset-glib.h', + 'dataset-factory.h', + 'dataset.h', + 'file-format.h', + 'fragment.h', + 'partitioning.h', + 'scanner.h', +) + +cpp_headers = files( + 'arrow-dataset-glib.hpp', + 'dataset-factory.hpp', + 'dataset.hpp', + 'file-format.hpp', + 'fragment.hpp', + 'partitioning.hpp', + 'scanner.hpp', +) + +enums = gnome.mkenums('enums', + sources: c_headers, + identifier_prefix: 'GADataset', + symbol_prefix: 'gadataset', + c_template: 'enums.c.template', + h_template: 'enums.h.template', + install_dir: join_paths(include_dir, meson.project_name()), + install_header: true) +enums_source = enums[0] +enums_header = enums[1] + + +headers = c_headers + cpp_headers +install_headers(headers, subdir: 'arrow-dataset-glib') + +dependencies = [ + arrow_dataset, + arrow_glib, +] +libarrow_dataset_glib = library('arrow-dataset-glib', + sources: sources + enums, + install: true, + dependencies: dependencies, + include_directories: base_include_directories, + soversion: so_version, + version: library_version) +arrow_dataset_glib = declare_dependency(link_with: libarrow_dataset_glib, + include_directories: base_include_directories, + dependencies: dependencies, + sources: enums_header) + +pkgconfig.generate(libarrow_dataset_glib, + filebase: 'arrow-dataset-glib', + name: 'Apache Arrow Dataset GLib', + description: 'C API for Apache Arrow Dataset based on GLib', + version: version, + requires: ['arrow-glib', 'arrow-dataset']) + +if have_gi + gnome.generate_gir(libarrow_dataset_glib, + dependencies: declare_dependency(sources: arrow_glib_gir), + sources: sources + c_headers + enums, + namespace: 'ArrowDataset', + nsversion: api_version, + identifier_prefix: 'GADataset', + symbol_prefix: 'gadataset', + export_packages: 'arrow-dataset-glib', + includes: [ + 'Arrow-1.0', + ], + install: true, + extra_args: [ + '--warn-all', + '--include-uninstalled=./arrow-glib/Arrow-1.0.gir', + ]) +endif diff --git a/src/arrow/c_glib/arrow-dataset-glib/partitioning.cpp b/src/arrow/c_glib/arrow-dataset-glib/partitioning.cpp new file mode 100644 index 000000000..bce33671a --- /dev/null +++ b/src/arrow/c_glib/arrow-dataset-glib/partitioning.cpp @@ -0,0 +1,440 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/array.hpp> +#include <arrow-glib/error.hpp> +#include <arrow-glib/schema.hpp> + +#include <arrow-dataset-glib/enums.h> +#include <arrow-dataset-glib/partitioning.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: partitioning + * @section_id: partitioning + * @title: Partitioning classes + * @include: arrow-dataset-glib/arrow-dataset-glib.h + * + * #GADatasetPartitioningOptions is a class for partitioning options. + * + * #GADatasetPartitioning is a base class for partitioning classes + * such as #GADatasetDirectoryPartitioning. + * + * #GADatasetKeyValuePartitioning is a base class for key-value style + * partitioning classes such as #GADatasetDirectoryPartitioning. + * + * #GADatasetDirectoryPartitioning is a class for partitioning that + * uses directory structure. + * + * Since: 6.0.0 + */ + +typedef struct GADatasetPartitioningOptionsPrivate_ { + gboolean infer_dictionary; + GArrowSchema *schema; + GADatasetSegmentEncoding segment_encoding; +} GADatasetPartitioningOptionsPrivate; + +enum { + PROP_INFER_DICTIONARY = 1, + PROP_SCHEMA, + PROP_SEGMENT_ENCODING, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GADatasetPartitioningOptions, + gadataset_partitioning_options, + G_TYPE_OBJECT) + +#define GADATASET_PARTITIONING_OPTIONS_GET_PRIVATE(obj) \ + static_cast<GADatasetPartitioningOptionsPrivate *>( \ + gadataset_partitioning_options_get_instance_private( \ + GADATASET_PARTITIONING_OPTIONS(obj))) + +static void +gadataset_partitioning_options_dispose(GObject *object) +{ + auto priv = GADATASET_PARTITIONING_OPTIONS_GET_PRIVATE(object); + + if (priv->schema) { + g_object_unref(priv->schema); + priv->schema = nullptr; + } + + G_OBJECT_CLASS(gadataset_partitioning_options_parent_class)->dispose(object); +} + +static void +gadataset_partitioning_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GADATASET_PARTITIONING_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_INFER_DICTIONARY: + priv->infer_dictionary = g_value_get_boolean(value); + break; + case PROP_SCHEMA: + { + auto schema = g_value_get_object(value); + if (priv->schema == schema) { + break; + } + auto old_schema = priv->schema; + if (schema) { + g_object_ref(schema); + priv->schema = GARROW_SCHEMA(schema); + } else { + priv->schema = NULL; + } + if (old_schema) { + g_object_unref(old_schema); + } + } + break; + case PROP_SEGMENT_ENCODING: + priv->segment_encoding = + static_cast<GADatasetSegmentEncoding>(g_value_get_enum(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gadataset_partitioning_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GADATASET_PARTITIONING_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_INFER_DICTIONARY: + g_value_set_boolean(value, priv->infer_dictionary); + break; + case PROP_SCHEMA: + g_value_set_object(value, priv->schema); + break; + case PROP_SEGMENT_ENCODING: + g_value_set_enum(value, priv->segment_encoding); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gadataset_partitioning_options_init(GADatasetPartitioningOptions *object) +{ +} + +static void +gadataset_partitioning_options_class_init( + GADatasetPartitioningOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = gadataset_partitioning_options_dispose; + gobject_class->set_property = gadataset_partitioning_options_set_property; + gobject_class->get_property = gadataset_partitioning_options_get_property; + + arrow::dataset::PartitioningFactoryOptions default_options; + GParamSpec *spec; + /** + * GADatasetPartitioningOptions:infer-dictionary: + * + * When inferring a schema for partition fields, yield dictionary + * encoded types instead of plain. This can be more efficient when + * materializing virtual columns, and Expressions parsed by the + * finished Partitioning will include dictionaries of all unique + * inspected values for each field. + * + * Since: 6.0.0 + */ + spec = g_param_spec_boolean("infer-dictionary", + "Infer dictionary", + "Whether encode partitioned field values as " + "dictionary", + default_options.infer_dictionary, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_INFER_DICTIONARY, spec); + + /** + * GADatasetPartitioningOptions:schema: + * + * Optionally, an expected schema can be provided, in which case + * inference will only check discovered fields against the schema + * and update internal state (such as dictionaries). + * + * Since: 6.0.0 + */ + spec = g_param_spec_object("schema", + "Schema", + "Inference will only check discovered fields " + "against the schema and update internal state", + GARROW_TYPE_SCHEMA, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_SCHEMA, spec); + + /** + * GADatasetPartitioningOptions:segment-encoding: + * + * After splitting a path into components, decode the path + * components before parsing according to this scheme. + * + * Since: 6.0.0 + */ + spec = g_param_spec_enum("segment-encoding", + "Segment encoding", + "After splitting a path into components, " + "decode the path components before " + "parsing according to this scheme", + GADATASET_TYPE_SEGMENT_ENCODING, + static_cast<GADatasetSegmentEncoding>( + default_options.segment_encoding), + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_SEGMENT_ENCODING, spec); +} + +/** + * gadataset_partitioning_options_new: + * + * Returns: The newly created #GADatasetPartitioningOptions. + * + * Since: 6.0.0 + */ +GADatasetPartitioningOptions * +gadataset_partitioning_options_new(void) +{ + return GADATASET_PARTITIONING_OPTIONS( + g_object_new(GADATASET_TYPE_PARTITIONING_OPTIONS, + NULL)); +} + + +typedef struct GADatasetPartitioningPrivate_ { + std::shared_ptr<arrow::dataset::Partitioning> partitioning; +} GADatasetPartitioningPrivate; + +enum { + PROP_PARTITIONING = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GADatasetPartitioning, + gadataset_partitioning, + G_TYPE_OBJECT) + +#define GADATASET_PARTITIONING_GET_PRIVATE(obj) \ + static_cast<GADatasetPartitioningPrivate *>( \ + gadataset_partitioning_get_instance_private( \ + GADATASET_PARTITIONING(obj))) + +static void +gadataset_partitioning_finalize(GObject *object) +{ + auto priv = GADATASET_PARTITIONING_GET_PRIVATE(object); + priv->partitioning.~shared_ptr(); + G_OBJECT_CLASS(gadataset_partitioning_parent_class)->finalize(object); +} + +static void +gadataset_partitioning_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GADATASET_PARTITIONING_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_PARTITIONING: + priv->partitioning = + *static_cast<std::shared_ptr<arrow::dataset::Partitioning> *>( + g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gadataset_partitioning_init(GADatasetPartitioning *object) +{ + auto priv = GADATASET_PARTITIONING_GET_PRIVATE(object); + new(&priv->partitioning) std::shared_ptr<arrow::dataset::Partitioning>; +} + +static void +gadataset_partitioning_class_init(GADatasetPartitioningClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = gadataset_partitioning_finalize; + gobject_class->set_property = gadataset_partitioning_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("partitioning", + "Partitioning", + "The raw " + "std::shared<arrow::dataset::Partitioning> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_PARTITIONING, spec); +} + +/** + * gadataset_partitioning_new: + * + * Returns: The newly created #GADatasetPartitioning that doesn't + * partition. + * + * Since: 6.0.0 + */ +GADatasetPartitioning * +gadataset_partitioning_new(void) +{ + auto arrow_partitioning = arrow::dataset::Partitioning::Default(); + return GADATASET_PARTITIONING( + g_object_new(GADATASET_TYPE_PARTITIONING, + "partitioning", &arrow_partitioning, + NULL)); +} + +/** + * gadataset_partitioning_get_type_name: + * @partitioning: A #GADatasetPartitioning. + * + * Returns: The type name of @partitioning. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 6.0.0 + */ +gchar * +gadataset_partitioning_get_type_name(GADatasetPartitioning *partitioning) +{ + auto arrow_partitioning = gadataset_partitioning_get_raw(partitioning); + auto arrow_type_name = arrow_partitioning->type_name(); + return g_strndup(arrow_type_name.c_str(), + arrow_type_name.size()); +} + + +G_DEFINE_TYPE(GADatasetKeyValuePartitioning, + gadataset_key_value_partitioning, + GADATASET_TYPE_PARTITIONING) + +static void +gadataset_key_value_partitioning_init(GADatasetKeyValuePartitioning *object) +{ +} + +static void +gadataset_key_value_partitioning_class_init( + GADatasetKeyValuePartitioningClass *klass) +{ +} + + +G_DEFINE_TYPE(GADatasetDirectoryPartitioning, + gadataset_directory_partitioning, + GADATASET_TYPE_KEY_VALUE_PARTITIONING) + +static void +gadataset_directory_partitioning_init(GADatasetDirectoryPartitioning *object) +{ +} + +static void +gadataset_directory_partitioning_class_init( + GADatasetDirectoryPartitioningClass *klass) +{ +} + +/** + * gadataset_directory_partitioning_new: + * @schema: A #GArrowSchema that describes all partitioned segments. + * @dictionaries: (nullable) (element-type GArrowArray): A list of #GArrowArray + * for dictionary data types in @schema. + * @options: (nullable): A #GADatasetPartitioningOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: The newly created #GADatasetDirectoryPartitioning on success, + * %NULL on error. + * + * Since: 6.0.0 + */ +GADatasetDirectoryPartitioning * +gadataset_directory_partitioning_new(GArrowSchema *schema, + GList *dictionaries, + GADatasetPartitioningOptions *options, + GError **error) +{ + auto arrow_schema = garrow_schema_get_raw(schema); + std::vector<std::shared_ptr<arrow::Array>> arrow_dictionaries; + for (auto node = dictionaries; node; node = node->next) { + auto dictionary = GARROW_ARRAY(node->data); + if (dictionary) { + arrow_dictionaries.push_back(garrow_array_get_raw(dictionary)); + } else { + arrow_dictionaries.push_back(nullptr); + } + } + arrow::dataset::KeyValuePartitioningOptions arrow_options; + if (options) { + arrow_options = + gadataset_partitioning_options_get_raw_key_value_partitioning_options( + options); + } + auto arrow_partitioning = + std::make_shared<arrow::dataset::DirectoryPartitioning>( + arrow_schema, + arrow_dictionaries, + arrow_options); + return GADATASET_DIRECTORY_PARTITIONING( + g_object_new(GADATASET_TYPE_DIRECTORY_PARTITIONING, + "partitioning", &arrow_partitioning, + NULL)); +} + + +G_END_DECLS + +arrow::dataset::KeyValuePartitioningOptions +gadataset_partitioning_options_get_raw_key_value_partitioning_options( + GADatasetPartitioningOptions *options) +{ + auto priv = GADATASET_PARTITIONING_OPTIONS_GET_PRIVATE(options); + arrow::dataset::KeyValuePartitioningOptions arrow_options; + arrow_options.segment_encoding = + static_cast<arrow::dataset::SegmentEncoding>(priv->segment_encoding); + return arrow_options; +} + +std::shared_ptr<arrow::dataset::Partitioning> +gadataset_partitioning_get_raw(GADatasetPartitioning *partitioning) +{ + auto priv = GADATASET_PARTITIONING_GET_PRIVATE(partitioning); + return priv->partitioning; +} diff --git a/src/arrow/c_glib/arrow-dataset-glib/partitioning.h b/src/arrow/c_glib/arrow-dataset-glib/partitioning.h new file mode 100644 index 000000000..d408d9bd5 --- /dev/null +++ b/src/arrow/c_glib/arrow-dataset-glib/partitioning.h @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/arrow-glib.h> + +G_BEGIN_DECLS + +/** + * GADatasetSegmentEncoding + * @GADATASET_SEGMENT_ENCODING_NONE: No encoding. + * @GADATASET_SEGMENT_ENCODING_URI: Segment values are URL-encoded. + * + * They are corresponding to `arrow::dataset::SegmentEncoding` values. + * + * Since: 6.0.0 + */ +typedef enum { + GADATASET_SEGMENT_ENCODING_NONE, + GADATASET_SEGMENT_ENCODING_URI, +} GADatasetSegmentEncoding; + + +#define GADATASET_TYPE_PARTITIONING_OPTIONS \ + (gadataset_partitioning_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GADatasetPartitioningOptions, + gadataset_partitioning_options, + GADATASET, + PARTITIONING_OPTIONS, + GObject) +struct _GADatasetPartitioningOptionsClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GADatasetPartitioningOptions * +gadataset_partitioning_options_new(void); + + +#define GADATASET_TYPE_PARTITIONING (gadataset_partitioning_get_type()) +G_DECLARE_DERIVABLE_TYPE(GADatasetPartitioning, + gadataset_partitioning, + GADATASET, + PARTITIONING, + GObject) +struct _GADatasetPartitioningClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GADatasetPartitioning * +gadataset_partitioning_new(void); +GARROW_AVAILABLE_IN_6_0 +gchar * +gadataset_partitioning_get_type_name(GADatasetPartitioning *partitioning); + + +#define GADATASET_TYPE_KEY_VALUE_PARTITIONING \ + (gadataset_key_value_partitioning_get_type()) +G_DECLARE_DERIVABLE_TYPE(GADatasetKeyValuePartitioning, + gadataset_key_value_partitioning, + GADATASET, + KEY_VALUE_PARTITIONING, + GADatasetPartitioning) +struct _GADatasetKeyValuePartitioningClass +{ + GADatasetPartitioningClass parent_class; +}; + + +#define GADATASET_TYPE_DIRECTORY_PARTITIONING \ + (gadataset_directory_partitioning_get_type()) +G_DECLARE_DERIVABLE_TYPE(GADatasetDirectoryPartitioning, + gadataset_directory_partitioning, + GADATASET, + DIRECTORY_PARTITIONING, + GADatasetKeyValuePartitioning) +struct _GADatasetDirectoryPartitioningClass +{ + GADatasetKeyValuePartitioningClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GADatasetDirectoryPartitioning * +gadataset_directory_partitioning_new(GArrowSchema *schema, + GList *dictionaries, + GADatasetPartitioningOptions *options, + GError **error); + + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-dataset-glib/partitioning.hpp b/src/arrow/c_glib/arrow-dataset-glib/partitioning.hpp new file mode 100644 index 000000000..2481ecb33 --- /dev/null +++ b/src/arrow/c_glib/arrow-dataset-glib/partitioning.hpp @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/dataset/api.h> + +#include <arrow-dataset-glib/partitioning.h> + +arrow::dataset::KeyValuePartitioningOptions +gadataset_partitioning_options_get_raw_key_value_partitioning_options( + GADatasetPartitioningOptions *options); + +std::shared_ptr<arrow::dataset::Partitioning> +gadataset_partitioning_get_raw(GADatasetPartitioning *partitioning); diff --git a/src/arrow/c_glib/arrow-dataset-glib/scanner.cpp b/src/arrow/c_glib/arrow-dataset-glib/scanner.cpp new file mode 100644 index 000000000..51542bb0a --- /dev/null +++ b/src/arrow/c_glib/arrow-dataset-glib/scanner.cpp @@ -0,0 +1,351 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/error.hpp> +#include <arrow-glib/expression.hpp> +#include <arrow-glib/reader.hpp> +#include <arrow-glib/table.hpp> + +#include <arrow-dataset-glib/dataset.hpp> +#include <arrow-dataset-glib/scanner.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: scanner + * @section_id: scanner + * @title: Scanner related classes + * @include: arrow-dataset-glib/arrow-dataset-glib.h + * + * #GADatasetScanner is a class for scanning dataset. + * + * #GADatasetScannerBuilder is a class for building a scanner. + * + * Since: 5.0.0 + */ + +typedef struct GADatasetScannerPrivate_ { + std::shared_ptr<arrow::dataset::Scanner> scanner; +} GADatasetScannerPrivate; + +enum { + PROP_SCANNER = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GADatasetScanner, + gadataset_scanner, + G_TYPE_OBJECT) + +#define GADATASET_SCANNER_GET_PRIVATE(obj) \ + static_cast<GADatasetScannerPrivate *>( \ + gadataset_scanner_get_instance_private( \ + GADATASET_SCANNER(obj))) + +static void +gadataset_scanner_finalize(GObject *object) +{ + auto priv = GADATASET_SCANNER_GET_PRIVATE(object); + priv->scanner.~shared_ptr(); + G_OBJECT_CLASS(gadataset_scanner_parent_class)->finalize(object); +} + +static void +gadataset_scanner_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GADATASET_SCANNER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_SCANNER: + priv->scanner = + *static_cast<std::shared_ptr<arrow::dataset::Scanner> *>( + g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gadataset_scanner_init(GADatasetScanner *object) +{ + auto priv = GADATASET_SCANNER_GET_PRIVATE(object); + new(&priv->scanner) std::shared_ptr<arrow::dataset::Scanner>; +} + +static void +gadataset_scanner_class_init(GADatasetScannerClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->finalize = gadataset_scanner_finalize; + gobject_class->set_property = gadataset_scanner_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("scanner", + "Scanner", + "The raw std::shared<arrow::dataset::Scanner> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_SCANNER, spec); +} + +/** + * gadataset_scanner_to_table: + * @scanner: A #GADatasetScanner. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): + * A newly created #GArrowTable on success, %NULL on error. + * + * Since: 5.0.0 + */ +GArrowTable * +gadataset_scanner_to_table(GADatasetScanner *scanner, + GError **error) +{ + auto arrow_scanner = gadataset_scanner_get_raw(scanner); + auto arrow_table_result = arrow_scanner->ToTable(); + if (garrow::check(error, arrow_table_result, "[scanner][to-table]")) { + auto arrow_table = *arrow_table_result; + return garrow_table_new_raw(&arrow_table); + } else { + return NULL; + } +} + + +typedef struct GADatasetScannerBuilderPrivate_ { + std::shared_ptr<arrow::dataset::ScannerBuilder> scanner_builder; +} GADatasetScannerBuilderPrivate; + +enum { + PROP_SCANNER_BUILDER = 1, + PROP_USE_ASYNC, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GADatasetScannerBuilder, + gadataset_scanner_builder, + G_TYPE_OBJECT) + +#define GADATASET_SCANNER_BUILDER_GET_PRIVATE(obj) \ + static_cast<GADatasetScannerBuilderPrivate *>( \ + gadataset_scanner_builder_get_instance_private( \ + GADATASET_SCANNER_BUILDER(obj))) + +static void +gadataset_scanner_builder_finalize(GObject *object) +{ + auto priv = GADATASET_SCANNER_BUILDER_GET_PRIVATE(object); + priv->scanner_builder.~shared_ptr(); + G_OBJECT_CLASS(gadataset_scanner_builder_parent_class)->finalize(object); +} + +static void +gadataset_scanner_builder_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GADATASET_SCANNER_BUILDER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_SCANNER_BUILDER: + priv->scanner_builder = + *static_cast<std::shared_ptr<arrow::dataset::ScannerBuilder> *>( + g_value_get_pointer(value)); + break; + case PROP_USE_ASYNC: + garrow::check(nullptr, + priv->scanner_builder->UseAsync(g_value_get_boolean(value)), + "[scanner-builder][use-async][set]"); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gadataset_scanner_builder_init(GADatasetScannerBuilder *object) +{ + auto priv = GADATASET_SCANNER_BUILDER_GET_PRIVATE(object); + new(&priv->scanner_builder) std::shared_ptr<arrow::dataset::ScannerBuilder>; +} + +static void +gadataset_scanner_builder_class_init(GADatasetScannerBuilderClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->finalize = gadataset_scanner_builder_finalize; + gobject_class->set_property = gadataset_scanner_builder_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("scanner-builder", + "Scanner builder", + "The raw " + "std::shared<arrow::dataset::ScannerBuilder> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_SCANNER_BUILDER, spec); + + arrow::dataset::ScanOptions default_options; + /** + * GADatasetScannerBuilder:use-async: + * + * Whether or not async mode is used. + * + * Since: 6.0.0 + */ + spec = g_param_spec_boolean("use-async", + "Use async", + "Whether or not async mode is used", + default_options.use_async, + static_cast<GParamFlags>(G_PARAM_WRITABLE)); + g_object_class_install_property(gobject_class, PROP_USE_ASYNC, spec); +} + +/** + * gadataset_scanner_builder_new: + * @dataset: A #GADatasetDataset to be scanned. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GADatasetScannerBuilder on success, + * %NULL on error. + * + * Since: 5.0.0 + */ +GADatasetScannerBuilder * +gadataset_scanner_builder_new(GADatasetDataset *dataset, GError **error) +{ + auto arrow_dataset = gadataset_dataset_get_raw(dataset); + auto arrow_scanner_builder_result = arrow_dataset->NewScan(); + if (garrow::check(error, + arrow_scanner_builder_result, + "[scanner-builder][new]")) { + auto arrow_scanner_builder = *arrow_scanner_builder_result; + return gadataset_scanner_builder_new_raw(&arrow_scanner_builder); + } else { + return NULL; + } +} + +/** + * gadataset_scanner_builder_new_record_batch_reader: + * @reader: A #GArrowRecordBatchReader that produces record batches. + * + * Returns: (nullable): A newly created #GADatasetScannerBuilder. + * + * Since: 6.0.0 + */ +GADatasetScannerBuilder * +gadataset_scanner_builder_new_record_batch_reader( + GArrowRecordBatchReader *reader) +{ + auto arrow_reader = garrow_record_batch_reader_get_raw(reader); + auto arrow_scanner_builder = + arrow::dataset::ScannerBuilder::FromRecordBatchReader(arrow_reader); + return gadataset_scanner_builder_new_raw(&arrow_scanner_builder); +} + +/** + * gadataset_scanner_builder_set_filter: + * @builder: A #GADatasetScannerBuilder. + * @expression: A #GArrowExpression to filter rows with. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE on error. + * + * Since: 6.0.0 + */ +gboolean +gadataset_scanner_builder_set_filter(GADatasetScannerBuilder *builder, + GArrowExpression *expression, + GError **error) +{ + auto arrow_builder = gadataset_scanner_builder_get_raw(builder); + auto arrow_expression = garrow_expression_get_raw(expression); + return garrow::check(error, + arrow_builder->Filter(*arrow_expression), + "[scanner-builder][filter][set]"); +} + +/** + * gadataset_scanner_builder_finish: + * @builder: A #GADatasetScannerBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): + * A newly created #GADatasetScanner on success, %NULL on error. + * + * Since: 5.0.0 + */ +GADatasetScanner * +gadataset_scanner_builder_finish(GADatasetScannerBuilder *builder, + GError **error) +{ + auto arrow_builder = gadataset_scanner_builder_get_raw(builder); + auto arrow_scanner_result = arrow_builder->Finish(); + if (garrow::check(error, arrow_scanner_result, "[scanner-builder][finish]")) { + auto arrow_scanner = *arrow_scanner_result; + return gadataset_scanner_new_raw(&arrow_scanner); + } else { + return NULL; + } +} + + +G_END_DECLS + +GADatasetScanner * +gadataset_scanner_new_raw( + std::shared_ptr<arrow::dataset::Scanner> *arrow_scanner) +{ + auto scanner = + GADATASET_SCANNER(g_object_new(GADATASET_TYPE_SCANNER, + "scanner", arrow_scanner, + NULL)); + return scanner; +} + +std::shared_ptr<arrow::dataset::Scanner> +gadataset_scanner_get_raw(GADatasetScanner *scanner) +{ + auto priv = GADATASET_SCANNER_GET_PRIVATE(scanner); + return priv->scanner; +} + +GADatasetScannerBuilder * +gadataset_scanner_builder_new_raw( + std::shared_ptr<arrow::dataset::ScannerBuilder> *arrow_scanner_builder) +{ + return GADATASET_SCANNER_BUILDER( + g_object_new(GADATASET_TYPE_SCANNER_BUILDER, + "scanner-builder", arrow_scanner_builder, + NULL)); +} + +std::shared_ptr<arrow::dataset::ScannerBuilder> +gadataset_scanner_builder_get_raw(GADatasetScannerBuilder *scanner_builder) +{ + auto priv = GADATASET_SCANNER_BUILDER_GET_PRIVATE(scanner_builder); + return priv->scanner_builder; +} diff --git a/src/arrow/c_glib/arrow-dataset-glib/scanner.h b/src/arrow/c_glib/arrow-dataset-glib/scanner.h new file mode 100644 index 000000000..59da2577d --- /dev/null +++ b/src/arrow/c_glib/arrow-dataset-glib/scanner.h @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-dataset-glib/dataset.h> +#include <arrow-dataset-glib/fragment.h> + +G_BEGIN_DECLS + +#define GADATASET_TYPE_SCANNER (gadataset_scanner_get_type()) +G_DECLARE_DERIVABLE_TYPE(GADatasetScanner, + gadataset_scanner, + GADATASET, + SCANNER, + GObject) +struct _GADatasetScannerClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowTable * +gadataset_scanner_to_table(GADatasetScanner *scanner, + GError **error); + +#define GADATASET_TYPE_SCANNER_BUILDER (gadataset_scanner_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GADatasetScannerBuilder, + gadataset_scanner_builder, + GADATASET, + SCANNER_BUILDER, + GObject) +struct _GADatasetScannerBuilderClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GADatasetScannerBuilder * +gadataset_scanner_builder_new(GADatasetDataset *dataset, + GError **error); +GARROW_AVAILABLE_IN_6_0 +GADatasetScannerBuilder * +gadataset_scanner_builder_new_record_batch_reader( + GArrowRecordBatchReader *reader); + +GARROW_AVAILABLE_IN_6_0 +gboolean +gadataset_scanner_builder_set_filter(GADatasetScannerBuilder *builder, + GArrowExpression *expression, + GError **error); + +GARROW_AVAILABLE_IN_5_0 +GADatasetScanner * +gadataset_scanner_builder_finish(GADatasetScannerBuilder *builder, + GError **error); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-dataset-glib/scanner.hpp b/src/arrow/c_glib/arrow-dataset-glib/scanner.hpp new file mode 100644 index 000000000..663ab6fc4 --- /dev/null +++ b/src/arrow/c_glib/arrow-dataset-glib/scanner.hpp @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/dataset/api.h> + +#include <arrow-dataset-glib/fragment.h> +#include <arrow-dataset-glib/scanner.h> + +GADatasetScanner * +gadataset_scanner_new_raw( + std::shared_ptr<arrow::dataset::Scanner> *arrow_scanner); +std::shared_ptr<arrow::dataset::Scanner> +gadataset_scanner_get_raw(GADatasetScanner *scanner); + +GADatasetScannerBuilder * +gadataset_scanner_builder_new_raw( + std::shared_ptr<arrow::dataset::ScannerBuilder> *arrow_scanner_builder); +std::shared_ptr<arrow::dataset::ScannerBuilder> +gadataset_scanner_builder_get_raw(GADatasetScannerBuilder *scanner_builder); diff --git a/src/arrow/c_glib/arrow-flight-glib/arrow-flight-glib.h b/src/arrow/c_glib/arrow-flight-glib/arrow-flight-glib.h new file mode 100644 index 000000000..6fc8f43d8 --- /dev/null +++ b/src/arrow/c_glib/arrow-flight-glib/arrow-flight-glib.h @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-flight-glib/client.h> +#include <arrow-flight-glib/common.h> +#include <arrow-flight-glib/server.h> diff --git a/src/arrow/c_glib/arrow-flight-glib/arrow-flight-glib.hpp b/src/arrow/c_glib/arrow-flight-glib/arrow-flight-glib.hpp new file mode 100644 index 000000000..11e1fe94d --- /dev/null +++ b/src/arrow/c_glib/arrow-flight-glib/arrow-flight-glib.hpp @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-flight-glib/client.hpp> +#include <arrow-flight-glib/common.hpp> +#include <arrow-flight-glib/server.hpp> diff --git a/src/arrow/c_glib/arrow-flight-glib/client.cpp b/src/arrow/c_glib/arrow-flight-glib/client.cpp new file mode 100644 index 000000000..7610fc985 --- /dev/null +++ b/src/arrow/c_glib/arrow-flight-glib/client.cpp @@ -0,0 +1,405 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/arrow-glib.hpp> + +#include <arrow-flight-glib/client.hpp> +#include <arrow-flight-glib/common.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: client + * @section_id: client + * @title: Client related classes + * @include: arrow-flight-glib/arrow-flight-glib.h + * + * #GAFlightStreamReader is a class for reading record batches from a + * server. + * + * #GAFlightCallOptions is a class for options of each call. + * + * #GAFlightClientOptions is a class for options of each client. + * + * #GAFlightClient is a class for Apache Arrow Flight client. + * + * Since: 5.0.0 + */ + +G_DEFINE_TYPE(GAFlightStreamReader, + gaflight_stream_reader, + GAFLIGHT_TYPE_RECORD_BATCH_READER) + +static void +gaflight_stream_reader_init(GAFlightStreamReader *object) +{ +} + +static void +gaflight_stream_reader_class_init(GAFlightStreamReaderClass *klass) +{ +} + +typedef struct GAFlightCallOptionsPrivate_ { + arrow::flight::FlightCallOptions options; +} GAFlightCallOptionsPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GAFlightCallOptions, + gaflight_call_options, + G_TYPE_OBJECT) + +#define GAFLIGHT_CALL_OPTIONS_GET_PRIVATE(obj) \ + static_cast<GAFlightCallOptionsPrivate *>( \ + gaflight_call_options_get_instance_private( \ + GAFLIGHT_CALL_OPTIONS(obj))) + +static void +gaflight_call_options_finalize(GObject *object) +{ + auto priv = GAFLIGHT_CALL_OPTIONS_GET_PRIVATE(object); + + priv->options.~FlightCallOptions(); + + G_OBJECT_CLASS(gaflight_call_options_parent_class)->finalize(object); +} + +static void +gaflight_call_options_init(GAFlightCallOptions *object) +{ + auto priv = GAFLIGHT_CALL_OPTIONS_GET_PRIVATE(object); + new(&priv->options) arrow::flight::FlightCallOptions; +} + +static void +gaflight_call_options_class_init(GAFlightCallOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = gaflight_call_options_finalize; +} + +/** + * gaflight_call_options_new: + * + * Returns: The newly created options for a call. + * + * Since: 5.0.0 + */ +GAFlightCallOptions * +gaflight_call_options_new(void) +{ + return static_cast<GAFlightCallOptions *>( + g_object_new(GAFLIGHT_TYPE_CALL_OPTIONS, NULL)); +} + + +typedef struct GAFlightClientOptionsPrivate_ { + arrow::flight::FlightClientOptions options; +} GAFlightClientOptionsPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GAFlightClientOptions, + gaflight_client_options, + G_TYPE_OBJECT) + +#define GAFLIGHT_CLIENT_OPTIONS_GET_PRIVATE(obj) \ + static_cast<GAFlightClientOptionsPrivate *>( \ + gaflight_client_options_get_instance_private( \ + GAFLIGHT_CLIENT_OPTIONS(obj))) + +static void +gaflight_client_options_finalize(GObject *object) +{ + auto priv = GAFLIGHT_CLIENT_OPTIONS_GET_PRIVATE(object); + + priv->options.~FlightClientOptions(); + + G_OBJECT_CLASS(gaflight_client_options_parent_class)->finalize(object); +} + +static void +gaflight_client_options_init(GAFlightClientOptions *object) +{ + auto priv = GAFLIGHT_CLIENT_OPTIONS_GET_PRIVATE(object); + new(&(priv->options)) arrow::flight::FlightClientOptions; + priv->options = arrow::flight::FlightClientOptions::Defaults(); +} + +static void +gaflight_client_options_class_init(GAFlightClientOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = gaflight_client_options_finalize; +} + +/** + * gaflight_client_options_new: + * + * Returns: The newly created options for a client. + * + * Since: 5.0.0 + */ +GAFlightClientOptions * +gaflight_client_options_new(void) +{ + return static_cast<GAFlightClientOptions *>( + g_object_new(GAFLIGHT_TYPE_CLIENT_OPTIONS, NULL)); +} + + +typedef struct GAFlightClientPrivate_ { + arrow::flight::FlightClient *client; +} GAFlightClientPrivate; + +enum { + PROP_CLIENT = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GAFlightClient, + gaflight_client, + G_TYPE_OBJECT) + +#define GAFLIGHT_CLIENT_GET_PRIVATE(obj) \ + static_cast<GAFlightClientPrivate *>( \ + gaflight_client_get_instance_private( \ + GAFLIGHT_CLIENT(obj))) + +static void +gaflight_client_finalize(GObject *object) +{ + auto priv = GAFLIGHT_CLIENT_GET_PRIVATE(object); + + delete priv->client; + + G_OBJECT_CLASS(gaflight_client_parent_class)->finalize(object); +} + +static void +gaflight_client_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GAFLIGHT_CLIENT_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_CLIENT: + priv->client = + static_cast<arrow::flight::FlightClient *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gaflight_client_init(GAFlightClient *object) +{ +} + +static void +gaflight_client_class_init(GAFlightClientClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = gaflight_client_finalize; + gobject_class->set_property = gaflight_client_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("client", + "Client", + "The raw arrow::flight::FlightClient *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_CLIENT, spec); +} + +/** + * gaflight_client_new: + * @location: A #GAFlightLocation to be connected. + * @options: (nullable): A #GAFlightClientOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): The newly created client, %NULL on error. + * + * Since: 5.0.0 + */ +GAFlightClient * +gaflight_client_new(GAFlightLocation *location, + GAFlightClientOptions *options, + GError **error) +{ + const auto flight_location = gaflight_location_get_raw(location); + std::unique_ptr<arrow::flight::FlightClient> flight_client; + arrow::Status status; + if (options) { + const auto flight_options = gaflight_client_options_get_raw(options); + status = arrow::flight::FlightClient::Connect(*flight_location, + *flight_options, + &flight_client); + } else { + status = arrow::flight::FlightClient::Connect(*flight_location, + &flight_client); + } + if (garrow::check(error, status, "[flight-client][new]")) { + return gaflight_client_new_raw(flight_client.release()); + } else { + return NULL; + } +} + +/** + * gaflight_client_list_flights: + * @client: A #GAFlightClient. + * @criteria: (nullable): A #GAFlightCriteria. + * @options: (nullable): A #GAFlightCallOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (element-type GAFlightInfo) (transfer full): + * The returned list of #GAFlightInfo on success, %NULL on error. + * + * Since: 5.0.0 + */ +GList * +gaflight_client_list_flights(GAFlightClient *client, + GAFlightCriteria *criteria, + GAFlightCallOptions *options, + GError **error) +{ + auto flight_client = gaflight_client_get_raw(client); + arrow::flight::Criteria flight_default_criteria; + auto flight_criteria = &flight_default_criteria; + if (criteria) { + flight_criteria = gaflight_criteria_get_raw(criteria); + } + arrow::flight::FlightCallOptions flight_default_options; + auto flight_options = &flight_default_options; + if (options) { + flight_options = gaflight_call_options_get_raw(options); + } + std::unique_ptr<arrow::flight::FlightListing> flight_listing; + auto status = flight_client->ListFlights(*flight_options, + *flight_criteria, + &flight_listing); + if (!garrow::check(error, + status, + "[flight-client][list-flights]")) { + return NULL; + } + GList *listing = NULL; + std::unique_ptr<arrow::flight::FlightInfo> flight_info; + while (true) { + status = flight_listing->Next(&flight_info); + if (!garrow::check(error, + status, + "[flight-client][list-flights]")) { + g_list_free_full(listing, g_object_unref); + return NULL; + } + if (!flight_info) { + break; + } + auto info = gaflight_info_new_raw(flight_info.release()); + listing = g_list_prepend(listing, info); + } + return g_list_reverse(listing); +} + +/** + * gaflight_client_do_get: + * @client: A #GAFlightClient. + * @ticket: A #GAFlightTicket. + * @options: (nullable): A #GAFlightCallOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): + * The #GAFlightStreamReader to read record batched from the server + * on success, %NULL on error. + * + * Since: 6.0.0 + */ +GAFlightStreamReader * +gaflight_client_do_get(GAFlightClient *client, + GAFlightTicket *ticket, + GAFlightCallOptions *options, + GError **error) +{ + auto flight_client = gaflight_client_get_raw(client); + const auto flight_ticket = gaflight_ticket_get_raw(ticket); + arrow::flight::FlightCallOptions flight_default_options; + auto flight_options = &flight_default_options; + if (options) { + flight_options = gaflight_call_options_get_raw(options); + } + std::unique_ptr<arrow::flight::FlightStreamReader> flight_reader; + auto status = flight_client->DoGet(*flight_options, + *flight_ticket, + &flight_reader); + if (garrow::check(error, + status, + "[flight-client][do-get]")) { + return gaflight_stream_reader_new_raw(flight_reader.release()); + } else { + return NULL; + } +} + + +G_END_DECLS + + +GAFlightStreamReader * +gaflight_stream_reader_new_raw( + arrow::flight::FlightStreamReader *flight_reader) +{ + return GAFLIGHT_STREAM_READER( + g_object_new(GAFLIGHT_TYPE_STREAM_READER, + "reader", flight_reader, + NULL)); +} + +arrow::flight::FlightCallOptions * +gaflight_call_options_get_raw(GAFlightCallOptions *options) +{ + auto priv = GAFLIGHT_CALL_OPTIONS_GET_PRIVATE(options); + return &(priv->options); +} + +arrow::flight::FlightClientOptions * +gaflight_client_options_get_raw(GAFlightClientOptions *options) +{ + auto priv = GAFLIGHT_CLIENT_OPTIONS_GET_PRIVATE(options); + return &(priv->options); +} + +arrow::flight::FlightClient * +gaflight_client_get_raw(GAFlightClient *client) +{ + auto priv = GAFLIGHT_CLIENT_GET_PRIVATE(client); + return priv->client; +} + +GAFlightClient * +gaflight_client_new_raw(arrow::flight::FlightClient *flight_client) +{ + return GAFLIGHT_CLIENT(g_object_new(GAFLIGHT_TYPE_CLIENT, + "client", flight_client, + NULL)); +} diff --git a/src/arrow/c_glib/arrow-flight-glib/client.h b/src/arrow/c_glib/arrow-flight-glib/client.h new file mode 100644 index 000000000..bc2971161 --- /dev/null +++ b/src/arrow/c_glib/arrow-flight-glib/client.h @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-flight-glib/common.h> + +G_BEGIN_DECLS + + +#define GAFLIGHT_TYPE_STREAM_READER \ + (gaflight_stream_reader_get_type()) +G_DECLARE_DERIVABLE_TYPE(GAFlightStreamReader, + gaflight_stream_reader, + GAFLIGHT, + STREAM_READER, + GAFlightRecordBatchReader) +struct _GAFlightStreamReaderClass +{ + GAFlightRecordBatchReaderClass parent_class; +}; + + +#define GAFLIGHT_TYPE_CALL_OPTIONS (gaflight_call_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GAFlightCallOptions, + gaflight_call_options, + GAFLIGHT, + CALL_OPTIONS, + GObject) +struct _GAFlightCallOptionsClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GAFlightCallOptions * +gaflight_call_options_new(void); + + +#define GAFLIGHT_TYPE_CLIENT_OPTIONS (gaflight_client_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GAFlightClientOptions, + gaflight_client_options, + GAFLIGHT, + CLIENT_OPTIONS, + GObject) +struct _GAFlightClientOptionsClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GAFlightClientOptions * +gaflight_client_options_new(void); + + +#define GAFLIGHT_TYPE_CLIENT (gaflight_client_get_type()) +G_DECLARE_DERIVABLE_TYPE(GAFlightClient, + gaflight_client, + GAFLIGHT, + CLIENT, + GObject) +struct _GAFlightClientClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GAFlightClient * +gaflight_client_new(GAFlightLocation *location, + GAFlightClientOptions *options, + GError **error); + +GARROW_AVAILABLE_IN_5_0 +GList * +gaflight_client_list_flights(GAFlightClient *client, + GAFlightCriteria *criteria, + GAFlightCallOptions *options, + GError **error); + +GARROW_AVAILABLE_IN_6_0 +GAFlightStreamReader * +gaflight_client_do_get(GAFlightClient *client, + GAFlightTicket *ticket, + GAFlightCallOptions *options, + GError **error); + + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-flight-glib/client.hpp b/src/arrow/c_glib/arrow-flight-glib/client.hpp new file mode 100644 index 000000000..1e68761b7 --- /dev/null +++ b/src/arrow/c_glib/arrow-flight-glib/client.hpp @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/flight/api.h> + +#include <arrow-flight-glib/client.h> + + +GAFlightStreamReader * +gaflight_stream_reader_new_raw(arrow::flight::FlightStreamReader *flight_reader); + +arrow::flight::FlightCallOptions * +gaflight_call_options_get_raw(GAFlightCallOptions *options); + +arrow::flight::FlightClientOptions * +gaflight_client_options_get_raw(GAFlightClientOptions *options); + +arrow::flight::FlightClient * +gaflight_client_get_raw(GAFlightClient *client); +GAFlightClient * +gaflight_client_new_raw(arrow::flight::FlightClient *flight_client); diff --git a/src/arrow/c_glib/arrow-flight-glib/common.cpp b/src/arrow/c_glib/arrow-flight-glib/common.cpp new file mode 100644 index 000000000..81b00f7a3 --- /dev/null +++ b/src/arrow/c_glib/arrow-flight-glib/common.cpp @@ -0,0 +1,1467 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/arrow-glib.hpp> + +#include <arrow-flight-glib/common.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: common + * @section_id: common + * @title: Classes both for client and server + * @include: arrow-flight-glib/arrow-flight-glib.h + * + * #GAFlightCriteria is a class for criteria. + * + * #GAFlightLocation is a class for location. + * + * #GAFlightDescriptor is a base class for all descriptor classes such + * as #GAFlightPathDescriptor. + * + * #GAFlightPathDescriptor is a class for path descriptor. + * + * #GAFlightCommandDescriptor is a class for command descriptor. + * + * #GAFlightTicket is a class for ticket. + * + * #GAFlightEndpoint is a class for endpoint. + * + * #GAFlightInfo is a class for flight information. + * + * #GAFlightStreamChunk is a class for a chunk in stream. + * + * #GAFlightRecordBatchReader is a class for reading record batches. + * + * Since: 5.0.0 + */ + +typedef struct GAFlightCriteriaPrivate_ { + arrow::flight::Criteria criteria; + GBytes *expression; +} GAFlightCriteriaPrivate; + +enum { + PROP_EXPRESSION = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GAFlightCriteria, + gaflight_criteria, + G_TYPE_OBJECT) + +#define GAFLIGHT_CRITERIA_GET_PRIVATE(obj) \ + static_cast<GAFlightCriteriaPrivate *>( \ + gaflight_criteria_get_instance_private( \ + GAFLIGHT_CRITERIA(obj))) + +static void +gaflight_criteria_dispose(GObject *object) +{ + auto priv = GAFLIGHT_CRITERIA_GET_PRIVATE(object); + + if (priv->expression) { + g_bytes_unref(priv->expression); + priv->expression = NULL; + } + + G_OBJECT_CLASS(gaflight_criteria_parent_class)->dispose(object); +} + +static void +gaflight_criteria_finalize(GObject *object) +{ + auto priv = GAFLIGHT_CRITERIA_GET_PRIVATE(object); + + priv->criteria.~Criteria(); + + G_OBJECT_CLASS(gaflight_criteria_parent_class)->finalize(object); +} + +static void +gaflight_criteria_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GAFLIGHT_CRITERIA_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_EXPRESSION: + if (priv->expression) { + g_bytes_unref(priv->expression); + } + priv->expression = static_cast<GBytes *>(g_value_dup_boxed(value)); + { + gsize size; + auto data = g_bytes_get_data(priv->expression, &size); + priv->criteria.expression.assign(static_cast<const char *>(data), + size); + } + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gaflight_criteria_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GAFLIGHT_CRITERIA_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_EXPRESSION: + g_value_set_boxed(value, priv->expression); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gaflight_criteria_init(GAFlightCriteria *object) +{ + auto priv = GAFLIGHT_CRITERIA_GET_PRIVATE(object); + new(&priv->criteria) arrow::flight::Criteria; +} + +static void +gaflight_criteria_class_init(GAFlightCriteriaClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = gaflight_criteria_dispose; + gobject_class->finalize = gaflight_criteria_finalize; + gobject_class->set_property = gaflight_criteria_set_property; + gobject_class->get_property = gaflight_criteria_get_property; + + GParamSpec *spec; + /** + * GAFlightCriteria:expression: + * + * Opaque criteria expression, dependent on server implementation. + * + * Since: 5.0.0 + */ + spec = g_param_spec_boxed("expression", + "Expression", + "Opaque criteria expression, " + "dependent on server implementation", + G_TYPE_BYTES, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_EXPRESSION, spec); +} + +/** + * gaflight_criteria_new: + * @expression: A #GBytes. + * + * Returns: The newly created #GAFlightCriteria, %NULL on error. + * + * Since: 5.0.0 + */ +GAFlightCriteria * +gaflight_criteria_new(GBytes *expression) +{ + return GAFLIGHT_CRITERIA( + g_object_new(GAFLIGHT_TYPE_CRITERIA, + "expression", expression, + NULL)); +} + + +typedef struct GAFlightLocationPrivate_ { + arrow::flight::Location location; +} GAFlightLocationPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GAFlightLocation, + gaflight_location, + G_TYPE_OBJECT) + +#define GAFLIGHT_LOCATION_GET_PRIVATE(obj) \ + static_cast<GAFlightLocationPrivate *>( \ + gaflight_location_get_instance_private( \ + GAFLIGHT_LOCATION(obj))) + +static void +gaflight_location_finalize(GObject *object) +{ + auto priv = GAFLIGHT_LOCATION_GET_PRIVATE(object); + + priv->location.~Location(); + + G_OBJECT_CLASS(gaflight_location_parent_class)->finalize(object); +} + +static void +gaflight_location_init(GAFlightLocation *object) +{ + auto priv = GAFLIGHT_LOCATION_GET_PRIVATE(object); + new(&priv->location) arrow::flight::Location; +} + +static void +gaflight_location_class_init(GAFlightLocationClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = gaflight_location_finalize; +} + +/** + * gaflight_location_new: + * @uri: An URI to specify location. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): The newly created location, %NULL on error. + * + * Since: 5.0.0 + */ +GAFlightLocation * +gaflight_location_new(const gchar *uri, + GError **error) +{ + auto location = GAFLIGHT_LOCATION(g_object_new(GAFLIGHT_TYPE_LOCATION, NULL)); + auto flight_location = gaflight_location_get_raw(location); + if (garrow::check(error, + arrow::flight::Location::Parse(uri, flight_location), + "[flight-location][new]")) { + return location; + } else { + g_object_unref(location); + return NULL; + } +} + +/** + * gaflight_location_to_string: + * @location: A #GAFlightLocation. + * + * Returns: A representation of this URI as a string. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 5.0.0 + */ +gchar * +gaflight_location_to_string(GAFlightLocation *location) +{ + const auto flight_location = gaflight_location_get_raw(location); + return g_strdup(flight_location->ToString().c_str()); +} + +/** + * gaflight_location_get_scheme: + * @location: A #GAFlightLocation. + * + * Returns: The scheme of this URI. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 5.0.0 + */ +gchar * +gaflight_location_get_scheme(GAFlightLocation *location) +{ + const auto flight_location = gaflight_location_get_raw(location); + return g_strdup(flight_location->scheme().c_str()); +} + +/** + * gaflight_location_equal: + * @location: A #GAFlightLocation. + * @other_location: A #GAFlightLocation to be compared. + * + * Returns: %TRUE if both of them represents the same URI, %FALSE otherwise. + * + * Since: 5.0.0 + */ +gboolean +gaflight_location_equal(GAFlightLocation *location, + GAFlightLocation *other_location) +{ + const auto flight_location = gaflight_location_get_raw(location); + const auto flight_other_location = gaflight_location_get_raw(other_location); + return flight_location->Equals(*flight_other_location); +} + + +typedef struct GAFlightDescriptorPrivate_ { + arrow::flight::FlightDescriptor descriptor; +} GAFlightDescriptorPrivate; + +enum { + PROP_DESCRIPTOR = 1, +}; + +G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GAFlightDescriptor, + gaflight_descriptor, + G_TYPE_OBJECT) + +#define GAFLIGHT_DESCRIPTOR_GET_PRIVATE(obj) \ + static_cast<GAFlightDescriptorPrivate *>( \ + gaflight_descriptor_get_instance_private( \ + GAFLIGHT_DESCRIPTOR(obj))) + +static void +gaflight_descriptor_finalize(GObject *object) +{ + auto priv = GAFLIGHT_DESCRIPTOR_GET_PRIVATE(object); + + priv->descriptor.~FlightDescriptor(); + + G_OBJECT_CLASS(gaflight_descriptor_parent_class)->finalize(object); +} + +static void +gaflight_descriptor_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GAFLIGHT_DESCRIPTOR_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_DESCRIPTOR: + priv->descriptor = *static_cast<arrow::flight::FlightDescriptor *>( + g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gaflight_descriptor_init(GAFlightDescriptor *object) +{ + auto priv = GAFLIGHT_DESCRIPTOR_GET_PRIVATE(object); + new(&priv->descriptor) arrow::flight::FlightDescriptor; +} + +static void +gaflight_descriptor_class_init(GAFlightDescriptorClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = gaflight_descriptor_finalize; + gobject_class->set_property = gaflight_descriptor_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("descriptor", + "Descriptor", + "The raw arrow::flight::FlightDescriptor", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_EXPRESSION, spec); +} + +/** + * gaflight_descriptor_to_string: + * @descriptor: A #GAFlightDescriptor. + * + * Returns: A descriptor as a string. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 5.0.0 + */ +gchar * +gaflight_descriptor_to_string(GAFlightDescriptor *descriptor) +{ + auto flight_descriptor = gaflight_descriptor_get_raw(descriptor); + return g_strdup(flight_descriptor->ToString().c_str()); +} + +/** + * gaflight_descriptor_equal: + * @descriptor: A #GAFlightDescriptor. + * @other_descriptor: A #GAFlightDescriptor to be compared. + * + * Returns: %TRUE if both of them represents the same descriptor, + * %FALSE otherwise. + * + * Since: 5.0.0 + */ +gboolean +gaflight_descriptor_equal(GAFlightDescriptor *descriptor, + GAFlightDescriptor *other_descriptor) +{ + const auto flight_descriptor = + gaflight_descriptor_get_raw(descriptor); + const auto flight_other_descriptor = + gaflight_descriptor_get_raw(other_descriptor); + return flight_descriptor->Equals(*flight_other_descriptor); +} + + +G_DEFINE_TYPE(GAFlightPathDescriptor, + gaflight_path_descriptor, + GAFLIGHT_TYPE_DESCRIPTOR) + +static void +gaflight_path_descriptor_init(GAFlightPathDescriptor *object) +{ +} + +static void +gaflight_path_descriptor_class_init(GAFlightPathDescriptorClass *klass) +{ +} + +/** + * gaflight_path_descriptor_new: + * @paths: (array length=n_paths): List of paths identifying a + * particular dataset. + * @n_paths: The number of @paths. + * + * Returns: The newly created #GAFlightPathDescriptor. + * + * Since: 5.0.0 + */ +GAFlightPathDescriptor * +gaflight_path_descriptor_new(const gchar **paths, + gsize n_paths) +{ + std::vector<std::string> flight_paths; + for (gsize i = 0; i < n_paths; i++) { + flight_paths.push_back(paths[i]); + } + auto flight_descriptor = arrow::flight::FlightDescriptor::Path(flight_paths); + return GAFLIGHT_PATH_DESCRIPTOR( + gaflight_descriptor_new_raw(&flight_descriptor)); +} + +/** + * gaflight_path_descriptor_get_paths: + * @descriptor: A #GAFlightPathDescriptor. + * + * Returns: (nullable) (array zero-terminated=1) (transfer full): + * The paths in this descriptor. + * + * It must be freed with g_strfreev() when no longer needed. + * + * Since: 5.0.0 + */ +gchar ** +gaflight_path_descriptor_get_paths(GAFlightPathDescriptor *descriptor) +{ + const auto flight_descriptor = + gaflight_descriptor_get_raw(GAFLIGHT_DESCRIPTOR(descriptor)); + const auto &flight_paths = flight_descriptor->path; + if (flight_paths.empty()) { + return NULL; + } else { + auto paths = g_new(gchar *, flight_paths.size() + 1); + gsize i = 0; + for (const auto &flight_path : flight_paths) { + paths[i++] = g_strdup(flight_path.c_str()); + } + paths[i] = NULL; + return paths; + } +} + + +G_DEFINE_TYPE(GAFlightCommandDescriptor, + gaflight_command_descriptor, + GAFLIGHT_TYPE_DESCRIPTOR) + +static void +gaflight_command_descriptor_init(GAFlightCommandDescriptor *object) +{ +} + +static void +gaflight_command_descriptor_class_init(GAFlightCommandDescriptorClass *klass) +{ +} + +/** + * gaflight_command_descriptor_new: + * @command: Opaque value used to express a command. + * + * Returns: The newly created #GAFlightCommandDescriptor. + * + * Since: 5.0.0 + */ +GAFlightCommandDescriptor * +gaflight_command_descriptor_new(const gchar *command) +{ + auto flight_descriptor = arrow::flight::FlightDescriptor::Command(command); + return GAFLIGHT_COMMAND_DESCRIPTOR( + gaflight_descriptor_new_raw(&flight_descriptor)); +} + +/** + * gaflight_command_descriptor_get_command: + * @descriptor: A #GAFlightCommandDescriptor. + * + * Returns: The opaque value used to express a command. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 5.0.0 + */ +gchar * +gaflight_command_descriptor_get_command(GAFlightCommandDescriptor *descriptor) +{ + const auto flight_descriptor = + gaflight_descriptor_get_raw(GAFLIGHT_DESCRIPTOR(descriptor)); + const auto &flight_command = flight_descriptor->cmd; + return g_strdup(flight_command.c_str()); +} + + +typedef struct GAFlightTicketPrivate_ { + arrow::flight::Ticket ticket; + GBytes *data; +} GAFlightTicketPrivate; + +enum { + PROP_DATA = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GAFlightTicket, + gaflight_ticket, + G_TYPE_OBJECT) + +#define GAFLIGHT_TICKET_GET_PRIVATE(obj) \ + static_cast<GAFlightTicketPrivate *>( \ + gaflight_ticket_get_instance_private( \ + GAFLIGHT_TICKET(obj))) + +static void +gaflight_ticket_dispose(GObject *object) +{ + auto priv = GAFLIGHT_TICKET_GET_PRIVATE(object); + + if (priv->data) { + g_bytes_unref(priv->data); + priv->data = NULL; + } + + G_OBJECT_CLASS(gaflight_ticket_parent_class)->dispose(object); +} + +static void +gaflight_ticket_finalize(GObject *object) +{ + auto priv = GAFLIGHT_TICKET_GET_PRIVATE(object); + + priv->ticket.~Ticket(); + + G_OBJECT_CLASS(gaflight_ticket_parent_class)->finalize(object); +} + +static void +gaflight_ticket_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GAFLIGHT_TICKET_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_DATA: + if (priv->data) { + g_bytes_unref(priv->data); + } + priv->data = static_cast<GBytes *>(g_value_dup_boxed(value)); + { + gsize size; + auto data = g_bytes_get_data(priv->data, &size); + priv->ticket.ticket.assign(static_cast<const char *>(data), + size); + } + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gaflight_ticket_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GAFLIGHT_TICKET_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_DATA: + g_value_set_boxed(value, priv->data); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gaflight_ticket_init(GAFlightTicket *object) +{ + auto priv = GAFLIGHT_TICKET_GET_PRIVATE(object); + new(&priv->ticket) arrow::flight::Ticket; +} + +static void +gaflight_ticket_class_init(GAFlightTicketClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = gaflight_ticket_dispose; + gobject_class->finalize = gaflight_ticket_finalize; + gobject_class->set_property = gaflight_ticket_set_property; + gobject_class->get_property = gaflight_ticket_get_property; + + GParamSpec *spec; + /** + * GAFlightTicket:data: + * + * Opaque identifier or credential to use when requesting a data + * stream with the DoGet RPC. + * + * Since: 5.0.0 + */ + spec = g_param_spec_boxed("data", + "Data", + "Opaque identifier or credential to use " + "when requesting a data stream with the DoGet RPC", + G_TYPE_BYTES, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_DATA, spec); +} + +/** + * gaflight_ticket_new: + * @data: A #GBytes. + * + * Returns: The newly created #GAFlightTicket, %NULL on error. + * + * Since: 5.0.0 + */ +GAFlightTicket * +gaflight_ticket_new(GBytes *data) +{ + return GAFLIGHT_TICKET( + g_object_new(GAFLIGHT_TYPE_TICKET, + "data", data, + NULL)); +} + +/** + * gaflight_ticket_equal: + * @ticket: A #GAFlightTicket. + * @other_ticket: A #GAFlightTicket to be compared. + * + * Returns: %TRUE if both of them represents the same ticket, %FALSE otherwise. + * + * Since: 5.0.0 + */ +gboolean +gaflight_ticket_equal(GAFlightTicket *ticket, + GAFlightTicket *other_ticket) +{ + const auto flight_ticket = gaflight_ticket_get_raw(ticket); + const auto flight_other_ticket = gaflight_ticket_get_raw(other_ticket); + return flight_ticket->Equals(*flight_other_ticket); +} + + +typedef struct GAFlightEndpointPrivate_ { + arrow::flight::FlightEndpoint endpoint; + GAFlightTicket *ticket; + GList *locations; +} GAFlightEndpointPrivate; + +enum { + PROP_TICKET = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GAFlightEndpoint, + gaflight_endpoint, + G_TYPE_OBJECT) + +#define GAFLIGHT_ENDPOINT_GET_PRIVATE(obj) \ + static_cast<GAFlightEndpointPrivate *>( \ + gaflight_endpoint_get_instance_private( \ + GAFLIGHT_ENDPOINT(obj))) + +static void +gaflight_endpoint_dispose(GObject *object) +{ + auto priv = GAFLIGHT_ENDPOINT_GET_PRIVATE(object); + + if (priv->ticket) { + g_object_unref(priv->ticket); + priv->ticket = NULL; + } + + if (priv->locations) { + g_list_free_full(priv->locations, g_object_unref); + priv->locations = NULL; + } + + G_OBJECT_CLASS(gaflight_endpoint_parent_class)->dispose(object); +} + +static void +gaflight_endpoint_finalize(GObject *object) +{ + auto priv = GAFLIGHT_ENDPOINT_GET_PRIVATE(object); + + priv->endpoint.~FlightEndpoint(); + + G_OBJECT_CLASS(gaflight_endpoint_parent_class)->finalize(object); +} + +static void +gaflight_endpoint_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GAFLIGHT_ENDPOINT_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_TICKET: + g_value_set_object(value, priv->ticket); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gaflight_endpoint_init(GAFlightEndpoint *object) +{ + auto priv = GAFLIGHT_ENDPOINT_GET_PRIVATE(object); + new(&priv->endpoint) arrow::flight::FlightEndpoint; +} + +static void +gaflight_endpoint_class_init(GAFlightEndpointClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = gaflight_endpoint_dispose; + gobject_class->finalize = gaflight_endpoint_finalize; + gobject_class->get_property = gaflight_endpoint_get_property; + + GParamSpec *spec; + /** + * GAFlightEndpoint:ticket: + * + * Opaque ticket identify; use with DoGet RPC. + * + * Since: 5.0.0 + */ + spec = g_param_spec_object("ticket", + "Ticket", + "Opaque ticket identify; use with DoGet RPC", + GAFLIGHT_TYPE_TICKET, + static_cast<GParamFlags>(G_PARAM_READABLE)); + g_object_class_install_property(gobject_class, PROP_TICKET, spec); +} + +/** + * gaflight_endpoint_new: + * @ticket: A #GAFlightTicket. + * @locations: (element-type GAFlightLocation): A list of #GAFlightLocation. + * + * Returns: The newly created #GAFlightEndpoint, %NULL on error. + * + * Since: 5.0.0 + */ +GAFlightEndpoint * +gaflight_endpoint_new(GAFlightTicket *ticket, + GList *locations) +{ + auto endpoint = gaflight_endpoint_new_raw(nullptr, ticket); + auto priv = GAFLIGHT_ENDPOINT_GET_PRIVATE(endpoint); + for (auto node = locations; node; node = node->next) { + auto location = GAFLIGHT_LOCATION(node->data); + priv->endpoint.locations.push_back(*gaflight_location_get_raw(location)); + } + return endpoint; +} + +/** + * gaflight_endpoint_equal: + * @endpoint: A #GAFlightEndpoint. + * @other_endpoint: A #GAFlightEndpoint to be compared. + * + * Returns: %TRUE if both of them represents the same endpoint, + * %FALSE otherwise. + * + * Since: 5.0.0 + */ +gboolean +gaflight_endpoint_equal(GAFlightEndpoint *endpoint, + GAFlightEndpoint *other_endpoint) +{ + const auto flight_endpoint = gaflight_endpoint_get_raw(endpoint); + const auto flight_other_endpoint = gaflight_endpoint_get_raw(other_endpoint); + return flight_endpoint->Equals(*flight_other_endpoint); +} + +/** + * gaflight_endpoint_get_locations: + * @endpoint: A #GAFlightEndpoint. + * + * Returns: (nullable) (element-type GAFlightLocation) (transfer full): + * The locations in this endpoint. + * + * It must be freed with g_list_free() and g_object_unref() when no + * longer needed. You can use `g_list_free_full(locations, + * g_object_unref)`. + * + * Since: 5.0.0 + */ +GList * +gaflight_endpoint_get_locations(GAFlightEndpoint *endpoint) +{ + const auto flight_endpoint = gaflight_endpoint_get_raw(endpoint); + GList *locations = NULL; + for (const auto &flight_location : flight_endpoint->locations) { + auto location = gaflight_location_new(flight_location.ToString().c_str(), + nullptr); + locations = g_list_prepend(locations, location); + } + return g_list_reverse(locations); +} + + +typedef struct GAFlightInfoPrivate_ { + arrow::flight::FlightInfo info; +} GAFlightInfoPrivate; + +enum { + PROP_INFO = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GAFlightInfo, + gaflight_info, + G_TYPE_OBJECT) + +#define GAFLIGHT_INFO_GET_PRIVATE(obj) \ + static_cast<GAFlightInfoPrivate *>( \ + gaflight_info_get_instance_private( \ + GAFLIGHT_INFO(obj))) + +static void +gaflight_info_finalize(GObject *object) +{ + auto priv = GAFLIGHT_INFO_GET_PRIVATE(object); + + priv->info.~FlightInfo(); + + G_OBJECT_CLASS(gaflight_info_parent_class)->finalize(object); +} + +static void +gaflight_info_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GAFLIGHT_INFO_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_INFO: + { + auto info = + static_cast<arrow::flight::FlightInfo *>(g_value_get_pointer(value)); + new(&(priv->info)) arrow::flight::FlightInfo(*info); + } + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gaflight_info_init(GAFlightInfo *object) +{ +} + +static void +gaflight_info_class_init(GAFlightInfoClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = gaflight_info_finalize; + gobject_class->set_property = gaflight_info_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("info", + "Info", + "The raw arrow::flight::FlightInfo *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_INFO, spec); +} + +/** + * gaflight_info_new: + * @schema: A #GArrowSchema. + * @descriptor: A #GAFlightDescriptor. + * @endpoints: (element-type GAFlightEndpoint): A list of #GAFlightEndpoint. + * @total_records: The number of total records. + * @total_bytes: The number of total bytes. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): The newly created #GAFlightInfo, %NULL on error. + * + * Since: 5.0.0 + */ +GAFlightInfo * +gaflight_info_new(GArrowSchema *schema, + GAFlightDescriptor *descriptor, + GList *endpoints, + gint64 total_records, + gint64 total_bytes, + GError **error) +{ + auto arrow_schema = garrow_schema_get_raw(schema); + auto flight_descriptor = gaflight_descriptor_get_raw(descriptor); + std::vector<arrow::flight::FlightEndpoint> flight_endpoints; + for (auto node = endpoints; node; node = node->next) { + auto endpoint = GAFLIGHT_ENDPOINT(node->data); + flight_endpoints.push_back(*gaflight_endpoint_get_raw(endpoint)); + } + auto flight_info_result = + arrow::flight::FlightInfo::Make(*arrow_schema, + *flight_descriptor, + flight_endpoints, + total_records, + total_bytes); + if (!garrow::check(error, + flight_info_result, + "[flight-info][new]")) { + return NULL; + } + return gaflight_info_new_raw(&(*flight_info_result)); +} + +/** + * gaflight_info_equal: + * @info: A #GAFlightInfo. + * @other_info: A #GAFlightInfo to be compared. + * + * Returns: %TRUE if both of them represents the same information, + * %FALSE otherwise. + * + * Since: 5.0.0 + */ +gboolean +gaflight_info_equal(GAFlightInfo *info, + GAFlightInfo *other_info) +{ + const auto flight_info = gaflight_info_get_raw(info); + const auto flight_other_info = gaflight_info_get_raw(other_info); + return + (flight_info->serialized_schema() == + flight_other_info->serialized_schema()) && + (flight_info->descriptor() == + flight_other_info->descriptor()) && + (flight_info->endpoints() == + flight_other_info->endpoints()) && + (flight_info->total_records() == + flight_other_info->total_records()) && + (flight_info->total_bytes() == + flight_other_info->total_bytes()); +} + +/** + * gaflight_info_get_schema: + * @info: A #GAFlightInfo. + * @options: (nullable): A #GArrowReadOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): Deserialized #GArrowSchema, %NULL on error. + * + * Since: 5.0.0 + */ +GArrowSchema * +gaflight_info_get_schema(GAFlightInfo *info, + GArrowReadOptions *options, + GError **error) +{ + const auto flight_info = gaflight_info_get_raw(info); + arrow::Status status; + std::shared_ptr<arrow::Schema> arrow_schema; + if (options) { + auto arrow_memo = garrow_read_options_get_dictionary_memo_raw(options); + status = flight_info->GetSchema(arrow_memo, &arrow_schema); + } else { + arrow::ipc::DictionaryMemo arrow_memo; + status = flight_info->GetSchema(&arrow_memo, &arrow_schema); + } + if (garrow::check(error, status, "[flight-info][get-schema]")) { + return garrow_schema_new_raw(&arrow_schema); + } else { + return NULL; + } +} + +/** + * gaflight_info_get_descriptor: + * @info: A #GAFlightInfo. + * + * Returns: (transfer full): The #GAFlightDescriptor of the information. + * + * Since: 5.0.0 + */ +GAFlightDescriptor * +gaflight_info_get_descriptor(GAFlightInfo *info) +{ + const auto flight_info = gaflight_info_get_raw(info); + return gaflight_descriptor_new_raw(&(flight_info->descriptor())); +} + +/** + * gaflight_info_get_endpoints: + * @info: A #GAFlightInfo. + * + * Returns: (element-type GAFlightEndpoint) (transfer full): + * The list of #GAFlightEndpoint of the information. + * + * Since: 5.0.0 + */ +GList * +gaflight_info_get_endpoints(GAFlightInfo *info) +{ + const auto flight_info = gaflight_info_get_raw(info); + GList *endpoints = NULL; + for (const auto &flight_endpoint : flight_info->endpoints()) { + auto endpoint = gaflight_endpoint_new_raw(&flight_endpoint, nullptr); + endpoints = g_list_prepend(endpoints, endpoint); + } + return g_list_reverse(endpoints); +} + +/** + * gaflight_info_get_total_records: + * @info: A #GAFlightInfo. + * + * Returns: The number of total records of the information. + * + * Since: 5.0.0 + */ +gint64 +gaflight_info_get_total_records(GAFlightInfo *info) +{ + const auto flight_info = gaflight_info_get_raw(info); + return flight_info->total_records(); +} + +/** + * gaflight_info_get_total_bytes: + * @info: A #GAFlightInfo. + * + * Returns: The number of total bytes of the information. + * + * Since: 5.0.0 + */ +gint64 +gaflight_info_get_total_bytes(GAFlightInfo *info) +{ + const auto flight_info = gaflight_info_get_raw(info); + return flight_info->total_bytes(); +} + +typedef struct GAFlightStreamChunkPrivate_ { + arrow::flight::FlightStreamChunk chunk; +} GAFlightStreamChunkPrivate; + +enum { + PROP_CHUNK = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GAFlightStreamChunk, + gaflight_stream_chunk, + G_TYPE_OBJECT) + +#define GAFLIGHT_STREAM_CHUNK_GET_PRIVATE(obj) \ + static_cast<GAFlightStreamChunkPrivate *>( \ + gaflight_stream_chunk_get_instance_private( \ + GAFLIGHT_STREAM_CHUNK(obj))) + +static void +gaflight_stream_chunk_finalize(GObject *object) +{ + auto priv = GAFLIGHT_STREAM_CHUNK_GET_PRIVATE(object); + + priv->chunk.~FlightStreamChunk(); + + G_OBJECT_CLASS(gaflight_info_parent_class)->finalize(object); +} + +static void +gaflight_stream_chunk_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GAFLIGHT_STREAM_CHUNK_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_CHUNK: + priv->chunk = + *static_cast<arrow::flight::FlightStreamChunk *>( + g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gaflight_stream_chunk_init(GAFlightStreamChunk *object) +{ +} + +static void +gaflight_stream_chunk_class_init(GAFlightStreamChunkClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = gaflight_stream_chunk_finalize; + gobject_class->set_property = gaflight_stream_chunk_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("chunk", + "Stream chunk", + "The raw arrow::flight::FlightStreamChunk *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_CHUNK, spec); +} + +/** + * gaflight_stream_chunk_get_data: + * @chunk: A #GAFlightStreamChunk. + * + * Returns: (transfer full): The data of the chunk. + * + * Since: 6.0.0 + */ +GArrowRecordBatch * +gaflight_stream_chunk_get_data(GAFlightStreamChunk *chunk) +{ + auto flight_chunk = gaflight_stream_chunk_get_raw(chunk); + return garrow_record_batch_new_raw(&(flight_chunk->data)); +} + +/** + * gaflight_stream_chunk_get_metadata: + * @chunk: A #GAFlightStreamChunk. + * + * Returns: (nullable) (transfer full): The metadata of the chunk. + * + * The metadata may be NULL. + * + * Since: 6.0.0 + */ +GArrowBuffer * +gaflight_stream_chunk_get_metadata(GAFlightStreamChunk *chunk) +{ + auto flight_chunk = gaflight_stream_chunk_get_raw(chunk); + if (flight_chunk->app_metadata) { + return garrow_buffer_new_raw(&(flight_chunk->app_metadata)); + } else { + return NULL; + } +} + + +typedef struct GAFlightRecordBatchReaderPrivate_ { + arrow::flight::MetadataRecordBatchReader *reader; +} GAFlightRecordBatchReaderPrivate; + +enum { + PROP_READER = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GAFlightRecordBatchReader, + gaflight_record_batch_reader, + G_TYPE_OBJECT) + +#define GAFLIGHT_RECORD_BATCH_READER_GET_PRIVATE(obj) \ + static_cast<GAFlightRecordBatchReaderPrivate *>( \ + gaflight_record_batch_reader_get_instance_private( \ + GAFLIGHT_RECORD_BATCH_READER(obj))) + +static void +gaflight_record_batch_reader_finalize(GObject *object) +{ + auto priv = GAFLIGHT_RECORD_BATCH_READER_GET_PRIVATE(object); + + delete priv->reader; + + G_OBJECT_CLASS(gaflight_info_parent_class)->finalize(object); +} + +static void +gaflight_record_batch_reader_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GAFLIGHT_RECORD_BATCH_READER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_READER: + priv->reader = + static_cast<arrow::flight::MetadataRecordBatchReader *>( + g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gaflight_record_batch_reader_init(GAFlightRecordBatchReader *object) +{ +} + +static void +gaflight_record_batch_reader_class_init(GAFlightRecordBatchReaderClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = gaflight_record_batch_reader_finalize; + gobject_class->set_property = gaflight_record_batch_reader_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("reader", + "Reader", + "The raw arrow::flight::MetadataRecordBatchReader *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_READER, spec); +} + +/** + * gaflight_record_batch_reader_read_next: + * @reader: A #GAFlightRecordBatchReader. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): The next chunk on success, %NULL on end + * of stream, %NULL on error. + * + * Since: 6.0.0 + */ +GAFlightStreamChunk * +gaflight_record_batch_reader_read_next(GAFlightRecordBatchReader *reader, + GError **error) +{ + auto flight_reader = gaflight_record_batch_reader_get_raw(reader); + arrow::flight::FlightStreamChunk flight_chunk; + auto status = flight_reader->Next(&flight_chunk); + if (garrow::check(error, status, "[flight-record-batch-reader][read-next]")) { + if (flight_chunk.data) { + return gaflight_stream_chunk_new_raw(&flight_chunk); + } else { + return NULL; + } + } else { + return NULL; + } +} + +/** + * gaflight_record_batch_reader_read_all: + * @reader: A #GAFlightRecordBatchReader. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): The all data on success, %NULL on error. + * + * Since: 6.0.0 + */ +GArrowTable * +gaflight_record_batch_reader_read_all(GAFlightRecordBatchReader *reader, + GError **error) +{ + auto flight_reader = gaflight_record_batch_reader_get_raw(reader); + std::shared_ptr<arrow::Table> arrow_table; + auto status = flight_reader->ReadAll(&arrow_table); + if (garrow::check(error, status, "[flight-record-batch-reader][read-all]")) { + return garrow_table_new_raw(&arrow_table); + } else { + return NULL; + } +} + + +G_END_DECLS + + +GAFlightCriteria * +gaflight_criteria_new_raw(const arrow::flight::Criteria *flight_criteria) +{ + auto criteria = g_object_new(GAFLIGHT_TYPE_CRITERIA, NULL); + auto priv = GAFLIGHT_CRITERIA_GET_PRIVATE(criteria); + priv->criteria = *flight_criteria; + priv->expression = g_bytes_new(priv->criteria.expression.data(), + priv->criteria.expression.size()); + return GAFLIGHT_CRITERIA(criteria); +} + +arrow::flight::Criteria * +gaflight_criteria_get_raw(GAFlightCriteria *criteria) +{ + auto priv = GAFLIGHT_CRITERIA_GET_PRIVATE(criteria); + return &(priv->criteria); +} + +arrow::flight::Location * +gaflight_location_get_raw(GAFlightLocation *location) +{ + auto priv = GAFLIGHT_LOCATION_GET_PRIVATE(location); + return &(priv->location); +} + +GAFlightDescriptor * +gaflight_descriptor_new_raw( + const arrow::flight::FlightDescriptor *flight_descriptor) +{ + GType gtype = GAFLIGHT_TYPE_DESCRIPTOR; + switch (flight_descriptor->type) { + case arrow::flight::FlightDescriptor::DescriptorType::PATH: + gtype = GAFLIGHT_TYPE_PATH_DESCRIPTOR; + break; + case arrow::flight::FlightDescriptor::DescriptorType::CMD: + gtype = GAFLIGHT_TYPE_COMMAND_DESCRIPTOR; + break; + default: + break; + } + return GAFLIGHT_DESCRIPTOR(g_object_new(gtype, + "descriptor", flight_descriptor, + NULL)); +} + +arrow::flight::FlightDescriptor * +gaflight_descriptor_get_raw(GAFlightDescriptor *descriptor) +{ + auto priv = GAFLIGHT_DESCRIPTOR_GET_PRIVATE(descriptor); + return &(priv->descriptor); +} + +GAFlightTicket * +gaflight_ticket_new_raw(const arrow::flight::Ticket *flight_ticket) +{ + auto ticket = g_object_new(GAFLIGHT_TYPE_TICKET, NULL); + auto priv = GAFLIGHT_TICKET_GET_PRIVATE(ticket); + priv->ticket = *flight_ticket; + priv->data = g_bytes_new(priv->ticket.ticket.data(), + priv->ticket.ticket.size()); + return GAFLIGHT_TICKET(ticket); +} + +arrow::flight::Ticket * +gaflight_ticket_get_raw(GAFlightTicket *ticket) +{ + auto priv = GAFLIGHT_TICKET_GET_PRIVATE(ticket); + return &(priv->ticket); +} + +GAFlightEndpoint * +gaflight_endpoint_new_raw(const arrow::flight::FlightEndpoint *flight_endpoint, + GAFlightTicket *ticket) +{ + auto endpoint = GAFLIGHT_ENDPOINT(g_object_new(GAFLIGHT_TYPE_ENDPOINT, + NULL)); + auto priv = GAFLIGHT_ENDPOINT_GET_PRIVATE(endpoint); + if (ticket) { + priv->ticket = ticket; + g_object_ref(priv->ticket); + priv->endpoint.ticket = *gaflight_ticket_get_raw(priv->ticket); + } else { + auto data = g_bytes_new(flight_endpoint->ticket.ticket.data(), + flight_endpoint->ticket.ticket.length()); + auto ticket = gaflight_ticket_new(data); + g_bytes_unref(data); + priv->ticket = ticket; + priv->endpoint.ticket.ticket = flight_endpoint->ticket.ticket; + } + if (flight_endpoint) { + priv->endpoint.locations = flight_endpoint->locations; + } + return endpoint; +} + +arrow::flight::FlightEndpoint * +gaflight_endpoint_get_raw(GAFlightEndpoint *endpoint) +{ + auto priv = GAFLIGHT_ENDPOINT_GET_PRIVATE(endpoint); + return &(priv->endpoint); +} + +GAFlightInfo * +gaflight_info_new_raw(arrow::flight::FlightInfo *flight_info) +{ + return GAFLIGHT_INFO(g_object_new(GAFLIGHT_TYPE_INFO, + "info", flight_info, + NULL)); +} + +arrow::flight::FlightInfo * +gaflight_info_get_raw(GAFlightInfo *info) +{ + auto priv = GAFLIGHT_INFO_GET_PRIVATE(info); + return &(priv->info); +} + +GAFlightStreamChunk * +gaflight_stream_chunk_new_raw(arrow::flight::FlightStreamChunk *flight_chunk) +{ + return GAFLIGHT_STREAM_CHUNK( + g_object_new(GAFLIGHT_TYPE_STREAM_CHUNK, + "chunk", flight_chunk, + NULL)); +} + +arrow::flight::FlightStreamChunk * +gaflight_stream_chunk_get_raw(GAFlightStreamChunk *chunk) +{ + auto priv = GAFLIGHT_STREAM_CHUNK_GET_PRIVATE(chunk); + return &(priv->chunk); +} + +arrow::flight::MetadataRecordBatchReader * +gaflight_record_batch_reader_get_raw(GAFlightRecordBatchReader *reader) +{ + auto priv = GAFLIGHT_RECORD_BATCH_READER_GET_PRIVATE(reader); + return priv->reader; +} diff --git a/src/arrow/c_glib/arrow-flight-glib/common.h b/src/arrow/c_glib/arrow-flight-glib/common.h new file mode 100644 index 000000000..368fb665b --- /dev/null +++ b/src/arrow/c_glib/arrow-flight-glib/common.h @@ -0,0 +1,268 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/arrow-glib.h> + +G_BEGIN_DECLS + + +#define GAFLIGHT_TYPE_CRITERIA (gaflight_criteria_get_type()) +G_DECLARE_DERIVABLE_TYPE(GAFlightCriteria, + gaflight_criteria, + GAFLIGHT, + CRITERIA, + GObject) +struct _GAFlightCriteriaClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GAFlightCriteria * +gaflight_criteria_new(GBytes *expression); + + +#define GAFLIGHT_TYPE_LOCATION (gaflight_location_get_type()) +G_DECLARE_DERIVABLE_TYPE(GAFlightLocation, + gaflight_location, + GAFLIGHT, + LOCATION, + GObject) +struct _GAFlightLocationClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GAFlightLocation * +gaflight_location_new(const gchar *uri, + GError **error); + +GARROW_AVAILABLE_IN_5_0 +gchar * +gaflight_location_to_string(GAFlightLocation *location); + +GARROW_AVAILABLE_IN_5_0 +gchar * +gaflight_location_get_scheme(GAFlightLocation *location); + +GARROW_AVAILABLE_IN_5_0 +gboolean +gaflight_location_equal(GAFlightLocation *location, + GAFlightLocation *other_location); + + +#define GAFLIGHT_TYPE_DESCRIPTOR (gaflight_descriptor_get_type()) +G_DECLARE_DERIVABLE_TYPE(GAFlightDescriptor, + gaflight_descriptor, + GAFLIGHT, + DESCRIPTOR, + GObject) +struct _GAFlightDescriptorClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +gchar * +gaflight_descriptor_to_string(GAFlightDescriptor *descriptor); + +GARROW_AVAILABLE_IN_5_0 +gboolean +gaflight_descriptor_equal(GAFlightDescriptor *descriptor, + GAFlightDescriptor *other_descriptor); + + +#define GAFLIGHT_TYPE_PATH_DESCRIPTOR (gaflight_path_descriptor_get_type()) +G_DECLARE_DERIVABLE_TYPE(GAFlightPathDescriptor, + gaflight_path_descriptor, + GAFLIGHT, + PATH_DESCRIPTOR, + GAFlightDescriptor) +struct _GAFlightPathDescriptorClass +{ + GAFlightDescriptorClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GAFlightPathDescriptor * +gaflight_path_descriptor_new(const gchar **paths, + gsize n_paths); + +GARROW_AVAILABLE_IN_5_0 +gchar ** +gaflight_path_descriptor_get_paths(GAFlightPathDescriptor *descriptor); + + +#define GAFLIGHT_TYPE_COMMAND_DESCRIPTOR (gaflight_command_descriptor_get_type()) +G_DECLARE_DERIVABLE_TYPE(GAFlightCommandDescriptor, + gaflight_command_descriptor, + GAFLIGHT, + COMMAND_DESCRIPTOR, + GAFlightDescriptor) +struct _GAFlightCommandDescriptorClass +{ + GAFlightDescriptorClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GAFlightCommandDescriptor * +gaflight_command_descriptor_new(const gchar *command); + +GARROW_AVAILABLE_IN_5_0 +gchar * +gaflight_command_descriptor_get_command(GAFlightCommandDescriptor *descriptor); + + +#define GAFLIGHT_TYPE_TICKET (gaflight_ticket_get_type()) +G_DECLARE_DERIVABLE_TYPE(GAFlightTicket, + gaflight_ticket, + GAFLIGHT, + TICKET, + GObject) +struct _GAFlightTicketClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GAFlightTicket * +gaflight_ticket_new(GBytes *data); + +GARROW_AVAILABLE_IN_5_0 +gboolean +gaflight_ticket_equal(GAFlightTicket *ticket, + GAFlightTicket *other_ticket); + + +#define GAFLIGHT_TYPE_ENDPOINT (gaflight_endpoint_get_type()) +G_DECLARE_DERIVABLE_TYPE(GAFlightEndpoint, + gaflight_endpoint, + GAFLIGHT, + ENDPOINT, + GObject) +struct _GAFlightEndpointClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GAFlightEndpoint * +gaflight_endpoint_new(GAFlightTicket *ticket, + GList *locations); + +GARROW_AVAILABLE_IN_5_0 +gboolean +gaflight_endpoint_equal(GAFlightEndpoint *endpoint, + GAFlightEndpoint *other_endpoint); + +GARROW_AVAILABLE_IN_5_0 +GList * +gaflight_endpoint_get_locations(GAFlightEndpoint *endpoint); + + +#define GAFLIGHT_TYPE_INFO (gaflight_info_get_type()) +G_DECLARE_DERIVABLE_TYPE(GAFlightInfo, + gaflight_info, + GAFLIGHT, + INFO, + GObject) +struct _GAFlightInfoClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GAFlightInfo * +gaflight_info_new(GArrowSchema *schema, + GAFlightDescriptor *descriptor, + GList *endpoints, + gint64 total_records, + gint64 total_bytes, + GError **error); + +GARROW_AVAILABLE_IN_5_0 +gboolean +gaflight_info_equal(GAFlightInfo *info, + GAFlightInfo *other_info); + +GARROW_AVAILABLE_IN_5_0 +GArrowSchema * +gaflight_info_get_schema(GAFlightInfo *info, + GArrowReadOptions *options, + GError **error); +GARROW_AVAILABLE_IN_5_0 +GAFlightDescriptor * +gaflight_info_get_descriptor(GAFlightInfo *info); +GARROW_AVAILABLE_IN_5_0 +GList * +gaflight_info_get_endpoints(GAFlightInfo *info); +GARROW_AVAILABLE_IN_5_0 +gint64 +gaflight_info_get_total_records(GAFlightInfo *info); +GARROW_AVAILABLE_IN_5_0 +gint64 +gaflight_info_get_total_bytes(GAFlightInfo *info); + + +#define GAFLIGHT_TYPE_STREAM_CHUNK (gaflight_stream_chunk_get_type()) +G_DECLARE_DERIVABLE_TYPE(GAFlightStreamChunk, + gaflight_stream_chunk, + GAFLIGHT, + STREAM_CHUNK, + GObject) +struct _GAFlightStreamChunkClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GArrowRecordBatch * +gaflight_stream_chunk_get_data(GAFlightStreamChunk *chunk); +GARROW_AVAILABLE_IN_6_0 +GArrowBuffer * +gaflight_stream_chunk_get_metadata(GAFlightStreamChunk *chunk); + + +#define GAFLIGHT_TYPE_RECORD_BATCH_READER \ + (gaflight_record_batch_reader_get_type()) +G_DECLARE_DERIVABLE_TYPE(GAFlightRecordBatchReader, + gaflight_record_batch_reader, + GAFLIGHT, + RECORD_BATCH_READER, + GObject) +struct _GAFlightRecordBatchReaderClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GAFlightStreamChunk * +gaflight_record_batch_reader_read_next(GAFlightRecordBatchReader *reader, + GError **error); + +GARROW_AVAILABLE_IN_6_0 +GArrowTable * +gaflight_record_batch_reader_read_all(GAFlightRecordBatchReader *reader, + GError **error); + + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-flight-glib/common.hpp b/src/arrow/c_glib/arrow-flight-glib/common.hpp new file mode 100644 index 000000000..d23f7c886 --- /dev/null +++ b/src/arrow/c_glib/arrow-flight-glib/common.hpp @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/flight/api.h> + +#include <arrow-flight-glib/common.h> + + +GAFlightCriteria * +gaflight_criteria_new_raw(const arrow::flight::Criteria *flight_criteria); +arrow::flight::Criteria * +gaflight_criteria_get_raw(GAFlightCriteria *criteria); + +arrow::flight::Location * +gaflight_location_get_raw(GAFlightLocation *location); + +GAFlightDescriptor * +gaflight_descriptor_new_raw( + const arrow::flight::FlightDescriptor *flight_descriptor); +arrow::flight::FlightDescriptor * +gaflight_descriptor_get_raw(GAFlightDescriptor *descriptor); + +GAFlightTicket * +gaflight_ticket_new_raw(const arrow::flight::Ticket *flight_ticket); +arrow::flight::Ticket * +gaflight_ticket_get_raw(GAFlightTicket *ticket); + +GAFlightEndpoint * +gaflight_endpoint_new_raw(const arrow::flight::FlightEndpoint *flight_endpoint, + GAFlightTicket *ticket); +arrow::flight::FlightEndpoint * +gaflight_endpoint_get_raw(GAFlightEndpoint *endpoint); + +GAFlightInfo * +gaflight_info_new_raw(arrow::flight::FlightInfo *flight_info); +arrow::flight::FlightInfo * +gaflight_info_get_raw(GAFlightInfo *info); + +GAFlightStreamChunk * +gaflight_stream_chunk_new_raw(arrow::flight::FlightStreamChunk *flight_chunk); +arrow::flight::FlightStreamChunk * +gaflight_stream_chunk_get_raw(GAFlightStreamChunk *chunk); + +arrow::flight::MetadataRecordBatchReader * +gaflight_record_batch_reader_get_raw(GAFlightRecordBatchReader *reader); diff --git a/src/arrow/c_glib/arrow-flight-glib/meson.build b/src/arrow/c_glib/arrow-flight-glib/meson.build new file mode 100644 index 000000000..c17415fee --- /dev/null +++ b/src/arrow/c_glib/arrow-flight-glib/meson.build @@ -0,0 +1,82 @@ +# -*- indent-tabs-mode: nil -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +sources = files( + 'client.cpp', + 'common.cpp', + 'server.cpp', +) + +c_headers = files( + 'arrow-flight-glib.h', + 'client.h', + 'common.h', + 'server.h', +) + +cpp_headers = files( + 'arrow-flight-glib.hpp', + 'client.hpp', + 'common.hpp', + 'server.hpp', +) + +headers = c_headers + cpp_headers +install_headers(headers, subdir: 'arrow-flight-glib') + +dependencies = [ + arrow_flight, + arrow_glib, +] +libarrow_flight_glib = library('arrow-flight-glib', + sources: sources, + install: true, + dependencies: dependencies, + include_directories: base_include_directories, + soversion: so_version, + version: library_version) +arrow_flight_glib = declare_dependency(link_with: libarrow_flight_glib, + include_directories: base_include_directories, + dependencies: dependencies) + +pkgconfig.generate(libarrow_flight_glib, + filebase: 'arrow-flight-glib', + name: 'Apache Arrow Flight GLib', + description: 'C API for Apache Arrow Flight based on GLib', + version: version, + requires: ['arrow-glib', 'arrow-flight']) + +if have_gi + gnome.generate_gir(libarrow_flight_glib, + dependencies: declare_dependency(sources: arrow_glib_gir), + sources: sources + c_headers, + namespace: 'ArrowFlight', + nsversion: api_version, + identifier_prefix: 'GAFlight', + symbol_prefix: 'gaflight', + export_packages: 'arrow-flight-glib', + includes: [ + 'Arrow-1.0', + ], + install: true, + extra_args: [ + '--warn-all', + '--include-uninstalled=./arrow-glib/Arrow-1.0.gir', + ]) +endif diff --git a/src/arrow/c_glib/arrow-flight-glib/server.cpp b/src/arrow/c_glib/arrow-flight-glib/server.cpp new file mode 100644 index 000000000..eb05284c1 --- /dev/null +++ b/src/arrow/c_glib/arrow-flight-glib/server.cpp @@ -0,0 +1,724 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow/util/make_unique.h> + +#include <arrow-glib/arrow-glib.hpp> + +#include <arrow-flight-glib/common.hpp> +#include <arrow-flight-glib/server.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: server + * @section_id: server + * @title: Server related classes + * @include: arrow-flight-glib/arrow-flight-glib.h + * + * #GAFlightDataStream is a class for producing a sequence of IPC + * payloads to be sent in `FlightData` protobuf messages. Generally, + * this is not used directly. Generally, #GAFlightRecordBatchStream is + * used instead. + * + * #GAFlightRecordBatchStream is a class for producing a sequence of + * IPC payloads to be sent in `FlightData` protobuf messages by + * #GArrowRecordBatchReader`. + * + * #GAFlightServerOptions is a class for options of each server. + * + * #GAFlightServerCallContext is a class for context of each server call. + * + * #GAFlightServer is a class to develop an Apache Arrow Flight server. + * + * Since: 5.0.0 + */ + + +typedef struct GAFlightDataStreamPrivate_ { + arrow::flight::FlightDataStream *stream; +} GAFlightDataStreamPrivate; + +enum { + PROP_STREAM = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GAFlightDataStream, + gaflight_data_stream, + G_TYPE_OBJECT) + +#define GAFLIGHT_DATA_STREAM_GET_PRIVATE(obj) \ + static_cast<GAFlightDataStreamPrivate *>( \ + gaflight_data_stream_get_instance_private( \ + GAFLIGHT_DATA_STREAM(obj))) + +static void +gaflight_data_stream_finalize(GObject *object) +{ + auto priv = GAFLIGHT_DATA_STREAM_GET_PRIVATE(object); + + delete priv->stream; + + G_OBJECT_CLASS(gaflight_data_stream_parent_class)->finalize(object); +} + +static void +gaflight_data_stream_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GAFLIGHT_DATA_STREAM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_STREAM: + priv->stream = static_cast<arrow::flight::FlightDataStream *>( + g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gaflight_data_stream_init(GAFlightDataStream *object) +{ +} + +static void +gaflight_data_stream_class_init(GAFlightDataStreamClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = gaflight_data_stream_finalize; + gobject_class->set_property = gaflight_data_stream_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("stream", + "Stream", + "The raw arrow::flight::FlightDataStream *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_STREAM, spec); +} + + +typedef struct GAFlightRecordBatchStreamPrivate_ { + GArrowRecordBatchReader *reader; +} GAFlightRecordBatchStreamPrivate; + +enum { + PROP_READER = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GAFlightRecordBatchStream, + gaflight_record_batch_stream, + GAFLIGHT_TYPE_DATA_STREAM) + +#define GAFLIGHT_RECORD_BATCH_STREAM_GET_PRIVATE(obj) \ + static_cast<GAFlightRecordBatchStreamPrivate *>( \ + gaflight_record_batch_stream_get_instance_private( \ + GAFLIGHT_RECORD_BATCH_STREAM(obj))) + +static void +gaflight_record_batch_stream_dispose(GObject *object) +{ + auto priv = GAFLIGHT_RECORD_BATCH_STREAM_GET_PRIVATE(object); + + if (priv->reader) { + g_object_unref(priv->reader); + priv->reader = NULL; + } + + G_OBJECT_CLASS(gaflight_record_batch_stream_parent_class)->dispose(object); +} + +static void +gaflight_record_batch_stream_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GAFLIGHT_RECORD_BATCH_STREAM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_READER: + priv->reader = GARROW_RECORD_BATCH_READER(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gaflight_record_batch_stream_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GAFLIGHT_RECORD_BATCH_STREAM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_READER: + g_value_set_object(value, priv->reader); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gaflight_record_batch_stream_init(GAFlightRecordBatchStream *object) +{ +} + +static void +gaflight_record_batch_stream_class_init(GAFlightRecordBatchStreamClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = gaflight_record_batch_stream_dispose; + gobject_class->set_property = gaflight_record_batch_stream_set_property; + gobject_class->get_property = gaflight_record_batch_stream_get_property; + + GParamSpec *spec; + /** + * GAFlightRecordBatchStream:reader: + * + * The reader that produces record batches. + * + * Since: 6.0.0 + */ + spec = g_param_spec_object("reader", + "Reader", + "The reader that produces record batches", + GARROW_TYPE_RECORD_BATCH_READER, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_READER, spec); +} + +/** + * gaflight_record_batch_stream_new: + * @reader: A #GArrowRecordBatchReader to be read. + * @options: (nullable): A #GArrowWriteOptions for writing record batches to + * a client. + * + * Returns: The newly created #GAFlightRecordBatchStream. + * + * Since: 6.0.0 + */ +GAFlightRecordBatchStream * +gaflight_record_batch_stream_new(GArrowRecordBatchReader *reader, + GArrowWriteOptions *options) +{ + auto arrow_reader = garrow_record_batch_reader_get_raw(reader); + auto arrow_options_default = arrow::ipc::IpcWriteOptions::Defaults(); + arrow::ipc::IpcWriteOptions *arrow_options = NULL; + if (options) { + arrow_options = garrow_write_options_get_raw(options); + } else { + arrow_options = &arrow_options_default; + } + auto stream = arrow::internal::make_unique< + arrow::flight::RecordBatchStream>(arrow_reader, *arrow_options); + return static_cast<GAFlightRecordBatchStream *>( + g_object_new(GAFLIGHT_TYPE_RECORD_BATCH_STREAM, + "stream", stream.release(), + "reader", reader, + NULL)); +} + + +typedef struct GAFlightServerOptionsPrivate_ { + arrow::flight::FlightServerOptions options; + GAFlightLocation *location; +} GAFlightServerOptionsPrivate; + +enum { + PROP_LOCATION = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GAFlightServerOptions, + gaflight_server_options, + G_TYPE_OBJECT) + +#define GAFLIGHT_SERVER_OPTIONS_GET_PRIVATE(obj) \ + static_cast<GAFlightServerOptionsPrivate *>( \ + gaflight_server_options_get_instance_private( \ + GAFLIGHT_SERVER_OPTIONS(obj))) + +static void +gaflight_server_options_dispose(GObject *object) +{ + auto priv = GAFLIGHT_SERVER_OPTIONS_GET_PRIVATE(object); + + if (priv->location) { + g_object_unref(priv->location); + priv->location = NULL; + } + + G_OBJECT_CLASS(gaflight_server_options_parent_class)->dispose(object); +} + +static void +gaflight_server_options_finalize(GObject *object) +{ + auto priv = GAFLIGHT_SERVER_OPTIONS_GET_PRIVATE(object); + + priv->options.~FlightServerOptions(); + + G_OBJECT_CLASS(gaflight_server_options_parent_class)->finalize(object); +} + +static void +gaflight_server_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GAFLIGHT_SERVER_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_LOCATION: + { + priv->location = GAFLIGHT_LOCATION(g_value_dup_object(value)); + auto flight_location = gaflight_location_get_raw(priv->location); + new(&(priv->options)) arrow::flight::FlightServerOptions(*flight_location); + } + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gaflight_server_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GAFLIGHT_SERVER_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_LOCATION: + g_value_set_object(value, priv->location); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gaflight_server_options_init(GAFlightServerOptions *object) +{ +} + +static void +gaflight_server_options_class_init(GAFlightServerOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = gaflight_server_options_dispose; + gobject_class->finalize = gaflight_server_options_finalize; + gobject_class->set_property = gaflight_server_options_set_property; + gobject_class->get_property = gaflight_server_options_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("location", + "Location", + "The location to be listened", + GAFLIGHT_TYPE_LOCATION, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_LOCATION, spec); +} + +/** + * gaflight_server_options_new: + * @location: A #GAFlightLocation to be listened. + * + * Returns: The newly created options for a server. + * + * Since: 5.0.0 + */ +GAFlightServerOptions * +gaflight_server_options_new(GAFlightLocation *location) +{ + return static_cast<GAFlightServerOptions *>( + g_object_new(GAFLIGHT_TYPE_SERVER_OPTIONS, + "location", location, + NULL)); +} + + +typedef struct GAFlightServerCallContextPrivate_ { + arrow::flight::ServerCallContext *call_context; +} GAFlightServerCallContextPrivate; + +enum { + PROP_CALL_CONTEXT = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GAFlightServerCallContext, + gaflight_server_call_context, + G_TYPE_OBJECT) + +#define GAFLIGHT_SERVER_CALL_CONTEXT_GET_PRIVATE(obj) \ + static_cast<GAFlightServerCallContextPrivate *>( \ + gaflight_server_call_context_get_instance_private( \ + GAFLIGHT_SERVER_CALL_CONTEXT(obj))) + +static void +gaflight_server_call_context_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GAFLIGHT_SERVER_CALL_CONTEXT_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_CALL_CONTEXT: + priv->call_context = + static_cast<arrow::flight::ServerCallContext *>( + g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gaflight_server_call_context_init(GAFlightServerCallContext *object) +{ +} + +static void +gaflight_server_call_context_class_init(GAFlightServerCallContextClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = gaflight_server_call_context_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("call-context", + "Call context", + "The raw arrow::flight::ServerCallContext", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_CALL_CONTEXT, spec); +} + + +G_END_DECLS +namespace gaflight { + class DataStream : public arrow::flight::FlightDataStream { + public: + DataStream(GAFlightDataStream *gastream) : + arrow::flight::FlightDataStream(), + gastream_(gastream) { + } + + ~DataStream() override { + g_object_unref(gastream_); + } + + std::shared_ptr<arrow::Schema> schema() override { + auto stream = gaflight_data_stream_get_raw(gastream_); + return stream->schema(); + } + + arrow::Status GetSchemaPayload( + arrow::flight::FlightPayload *payload) override { + auto stream = gaflight_data_stream_get_raw(gastream_); + return stream->GetSchemaPayload(payload); + } + + arrow::Status Next(arrow::flight::FlightPayload *payload) override { + auto stream = gaflight_data_stream_get_raw(gastream_); + return stream->Next(payload); + } + + private: + GAFlightDataStream *gastream_; + }; + + class Server : public arrow::flight::FlightServerBase { + public: + Server(GAFlightServer *gaserver) : gaserver_(gaserver) { + } + + arrow::Status + ListFlights( + const arrow::flight::ServerCallContext &context, + const arrow::flight::Criteria *criteria, + std::unique_ptr<arrow::flight::FlightListing> *listing) override { + auto gacontext = gaflight_server_call_context_new_raw(&context); + GAFlightCriteria *gacriteria = NULL; + if (criteria) { + gacriteria = gaflight_criteria_new_raw(criteria); + } + GError *gerror = NULL; + auto gaflights = gaflight_server_list_flights(gaserver_, + gacontext, + gacriteria, + &gerror); + if (gacriteria) { + g_object_unref(gacriteria); + } + g_object_unref(gacontext); + if (gerror) { + return garrow_error_to_status(gerror, + arrow::StatusCode::UnknownError, + "[flight-server][list-flights]"); + } + std::vector<arrow::flight::FlightInfo> flights; + for (auto node = gaflights; node; node = node->next) { + auto gaflight = GAFLIGHT_INFO(node->data); + flights.push_back(*gaflight_info_get_raw(gaflight)); + g_object_unref(gaflight); + } + g_list_free(gaflights); + *listing = arrow::internal::make_unique< + arrow::flight::SimpleFlightListing>(flights); + return arrow::Status::OK(); + } + + arrow::Status DoGet( + const arrow::flight::ServerCallContext &context, + const arrow::flight::Ticket &ticket, + std::unique_ptr<arrow::flight::FlightDataStream> *stream) override { + auto gacontext = gaflight_server_call_context_new_raw(&context); + auto gaticket = gaflight_ticket_new_raw(&ticket); + GError *gerror = NULL; + auto gastream = gaflight_server_do_get(gaserver_, + gacontext, + gaticket, + &gerror); + g_object_unref(gaticket); + g_object_unref(gacontext); + if (gerror) { + return garrow_error_to_status(gerror, + arrow::StatusCode::UnknownError, + "[flight-server][do-get]"); + } + *stream = arrow::internal::make_unique<DataStream>(gastream); + return arrow::Status::OK(); + } + + private: + GAFlightServer *gaserver_; + }; +}; +G_BEGIN_DECLS + +typedef struct GAFlightServerPrivate_ { + gaflight::Server server; +} GAFlightServerPrivate; + +G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GAFlightServer, + gaflight_server, + G_TYPE_OBJECT) + +#define GAFLIGHT_SERVER_GET_PRIVATE(obj) \ + static_cast<GAFlightServerPrivate *>( \ + gaflight_server_get_instance_private( \ + GAFLIGHT_SERVER(obj))) + +static void +gaflight_server_finalize(GObject *object) +{ + auto priv = GAFLIGHT_SERVER_GET_PRIVATE(object); + + priv->server.~Server(); + + G_OBJECT_CLASS(gaflight_server_parent_class)->finalize(object); +} + +static void +gaflight_server_init(GAFlightServer *object) +{ + auto priv = GAFLIGHT_SERVER_GET_PRIVATE(object); + new(&(priv->server)) gaflight::Server(object); +} + +static void +gaflight_server_class_init(GAFlightServerClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = gaflight_server_finalize; +} + +/** + * gaflight_server_listen: + * @server: A #GAFlightServer. + * @options: A #GAFlightServerOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE on error. + * + * Since: 5.0.0 + */ +gboolean +gaflight_server_listen(GAFlightServer *server, + GAFlightServerOptions *options, + GError **error) +{ + auto flight_server = gaflight_server_get_raw(server); + const auto flight_options = gaflight_server_options_get_raw(options); + return garrow::check(error, + flight_server->Init(*flight_options), + "[flight-server][listen]"); +} + +/** + * gaflight_server_new: + * @server: A #GAFlightServer. + * + * Returns: The port number listening. + * + * Since: 5.0.0 + */ +gint +gaflight_server_get_port(GAFlightServer *server) +{ + const auto flight_server = gaflight_server_get_raw(server); + return flight_server->port(); +} + +/** + * gaflight_server_shutdown: + * @server: A #GAFlightServer. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Shuts down the serve. This function can be called from signal + * handler or another thread. + * + * Returns: %TRUE on success, %FALSE on error. + * + * Since: 5.0.0 + */ +gboolean +gaflight_server_shutdown(GAFlightServer *server, + GError **error) +{ + auto flight_server = gaflight_server_get_raw(server); + return garrow::check(error, + flight_server->Shutdown(), + "[flight-server][shutdown]"); +} + +/** + * gaflight_server_list_flights: + * @server: A #GAFlightServer. + * @context: A #GAFlightServerCallContext. + * @criteria: (nullable): A #GAFlightCriteria. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (element-type GAFlightInfo) (transfer full): + * #GList of #GAFlightInfo on success, %NULL on error. + * + * Since: 5.0.0 + */ +GList * +gaflight_server_list_flights(GAFlightServer *server, + GAFlightServerCallContext *context, + GAFlightCriteria *criteria, + GError **error) +{ + auto klass = GAFLIGHT_SERVER_GET_CLASS(server); + if (!(klass && klass->list_flights)) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_NOT_IMPLEMENTED, + "not implemented"); + return NULL; + } + return (*(klass->list_flights))(server, context, criteria, error); +} + +/** + * gaflight_server_do_get: + * @server: A #GAFlightServer. + * @context: A #GAFlightServerCallContext. + * @ticket: A #GAFlightTicket. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): #GAFlightDataStream on success, %NULL on error. + * + * Since: 6.0.0 + */ +GAFlightDataStream * +gaflight_server_do_get(GAFlightServer *server, + GAFlightServerCallContext *context, + GAFlightTicket *ticket, + GError **error) +{ + auto klass = GAFLIGHT_SERVER_GET_CLASS(server); + if (!(klass && klass->do_get)) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_NOT_IMPLEMENTED, + "not implemented"); + return NULL; + } + return (*(klass->do_get))(server, context, ticket, error); +} + + +G_END_DECLS + + +arrow::flight::FlightDataStream * +gaflight_data_stream_get_raw(GAFlightDataStream *stream) +{ + auto priv = GAFLIGHT_DATA_STREAM_GET_PRIVATE(stream); + return priv->stream; +} + +arrow::flight::FlightServerOptions * +gaflight_server_options_get_raw(GAFlightServerOptions *options) +{ + auto priv = GAFLIGHT_SERVER_OPTIONS_GET_PRIVATE(options); + return &(priv->options); +} + +GAFlightServerCallContext * +gaflight_server_call_context_new_raw( + const arrow::flight::ServerCallContext *call_context) +{ + return GAFLIGHT_SERVER_CALL_CONTEXT( + g_object_new(GAFLIGHT_TYPE_SERVER_CALL_CONTEXT, + "call-context", call_context, + NULL)); +} + +arrow::flight::FlightServerBase * +gaflight_server_get_raw(GAFlightServer *server) +{ + auto priv = GAFLIGHT_SERVER_GET_PRIVATE(server); + return &(priv->server); +} diff --git a/src/arrow/c_glib/arrow-flight-glib/server.h b/src/arrow/c_glib/arrow-flight-glib/server.h new file mode 100644 index 000000000..107fe44bf --- /dev/null +++ b/src/arrow/c_glib/arrow-flight-glib/server.h @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-flight-glib/common.h> + +G_BEGIN_DECLS + + +#define GAFLIGHT_TYPE_DATA_STREAM \ + (gaflight_data_stream_get_type()) +G_DECLARE_DERIVABLE_TYPE(GAFlightDataStream, + gaflight_data_stream, + GAFLIGHT, + DATA_STREAM, + GObject) +struct _GAFlightDataStreamClass +{ + GObjectClass parent_class; +}; + + +#define GAFLIGHT_TYPE_RECORD_BATCH_STREAM \ + (gaflight_record_batch_stream_get_type()) +G_DECLARE_DERIVABLE_TYPE(GAFlightRecordBatchStream, + gaflight_record_batch_stream, + GAFLIGHT, + RECORD_BATCH_STREAM, + GAFlightDataStream) +struct _GAFlightRecordBatchStreamClass +{ + GAFlightDataStreamClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GAFlightRecordBatchStream * +gaflight_record_batch_stream_new(GArrowRecordBatchReader *reader, + GArrowWriteOptions *options); + + +#define GAFLIGHT_TYPE_SERVER_OPTIONS (gaflight_server_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GAFlightServerOptions, + gaflight_server_options, + GAFLIGHT, + SERVER_OPTIONS, + GObject) +struct _GAFlightServerOptionsClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GAFlightServerOptions * +gaflight_server_options_new(GAFlightLocation *location); + + +#define GAFLIGHT_TYPE_SERVER_CALL_CONTEXT \ + (gaflight_server_call_context_get_type()) +G_DECLARE_DERIVABLE_TYPE(GAFlightServerCallContext, + gaflight_server_call_context, + GAFLIGHT, + SERVER_CALL_CONTEXT, + GObject) +struct _GAFlightServerCallContextClass +{ + GObjectClass parent_class; +}; + + +#define GAFLIGHT_TYPE_SERVER (gaflight_server_get_type()) +G_DECLARE_DERIVABLE_TYPE(GAFlightServer, + gaflight_server, + GAFLIGHT, + SERVER, + GObject) +/** + * GAFlightServerClass: + * @list_flights: A virtual function to implement `ListFlights` API. + * @do_get: A virtual function to implement `DoGet` API. + * + * Since: 5.0.0 + */ +struct _GAFlightServerClass +{ + GObjectClass parent_class; + + GList *(*list_flights)(GAFlightServer *server, + GAFlightServerCallContext *context, + GAFlightCriteria *criteria, + GError **error); + GAFlightDataStream *(*do_get)(GAFlightServer *server, + GAFlightServerCallContext *context, + GAFlightTicket *ticket, + GError **error); +}; + +GARROW_AVAILABLE_IN_5_0 +gboolean +gaflight_server_listen(GAFlightServer *server, + GAFlightServerOptions *options, + GError **error); +GARROW_AVAILABLE_IN_5_0 +gint +gaflight_server_get_port(GAFlightServer *server); +GARROW_AVAILABLE_IN_5_0 +gboolean +gaflight_server_shutdown(GAFlightServer *server, + GError **error); +GARROW_AVAILABLE_IN_5_0 +gboolean +gaflight_server_wait(GAFlightServer *server, + GError **error); + +GARROW_AVAILABLE_IN_5_0 +GList * +gaflight_server_list_flights(GAFlightServer *server, + GAFlightServerCallContext *context, + GAFlightCriteria *criteria, + GError **error); +GARROW_AVAILABLE_IN_6_0 +GAFlightDataStream * +gaflight_server_do_get(GAFlightServer *server, + GAFlightServerCallContext *context, + GAFlightTicket *ticket, + GError **error); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-flight-glib/server.hpp b/src/arrow/c_glib/arrow-flight-glib/server.hpp new file mode 100644 index 000000000..f7f2a7aba --- /dev/null +++ b/src/arrow/c_glib/arrow-flight-glib/server.hpp @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/flight/api.h> + +#include <arrow-flight-glib/server.h> + + +arrow::flight::FlightDataStream * +gaflight_data_stream_get_raw(GAFlightDataStream *stream); + +arrow::flight::FlightServerOptions * +gaflight_server_options_get_raw(GAFlightServerOptions *options); + +GAFlightServerCallContext * +gaflight_server_call_context_new_raw( + const arrow::flight::ServerCallContext *flight_context); + +arrow::flight::FlightServerBase * +gaflight_server_get_raw(GAFlightServer *server); diff --git a/src/arrow/c_glib/arrow-glib/array-builder.cpp b/src/arrow/c_glib/arrow-glib/array-builder.cpp new file mode 100644 index 000000000..85aba3f05 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/array-builder.cpp @@ -0,0 +1,6192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/array-builder.hpp> +#include <arrow-glib/data-type.hpp> +#include <arrow-glib/decimal.hpp> +#include <arrow-glib/error.hpp> +#include <arrow-glib/type.hpp> + +template <typename BUILDER, typename VALUE> +gboolean +garrow_array_builder_append_value(GArrowArrayBuilder *builder, + VALUE value, + GError **error, + const gchar *context) +{ + auto arrow_builder = + static_cast<BUILDER>(garrow_array_builder_get_raw(builder)); + auto status = arrow_builder->Append(value); + return garrow_error_check(error, status, context); +} + +template <typename VALUE, typename APPEND_FUNCTION> +gboolean +garrow_array_builder_append_values(VALUE *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error, + const gchar *context, + APPEND_FUNCTION append_function) +{ + if (is_valids_length > 0) { + if (values_length != is_valids_length) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "%s: values length and is_valids length must be equal: " + "<%" G_GINT64_FORMAT "> != " + "<%" G_GINT64_FORMAT ">", + context, + values_length, + is_valids_length); + return FALSE; + } + + const gint64 chunk_size = 4096; + gint64 n_chunks = is_valids_length / chunk_size; + gint64 n_remains = is_valids_length % chunk_size; + gint64 n_loops = n_chunks; + if (n_remains > 0) { + ++n_loops; + } + for (gint64 i = 0; i < n_loops; ++i) { + uint8_t valid_bytes[chunk_size]; + gint64 offset = chunk_size * i; + const gboolean *chunked_is_valids = is_valids + offset; + gint64 n_values; + if (i == n_chunks) { + n_values = n_remains; + } else { + n_values = chunk_size; + } + for (gint64 j = 0; j < n_values; ++j) { + valid_bytes[j] = chunked_is_valids[j]; + } + auto status = append_function(values + offset, + n_values, + valid_bytes); + if (!garrow_error_check(error, status, context)) { + return FALSE; + } + } + return TRUE; + } else { + auto status = append_function(values, values_length, nullptr); + return garrow_error_check(error, status, context); + } +} + +template <typename BUILDER, typename VALUE> +gboolean +garrow_array_builder_append_values(GArrowArrayBuilder *builder, + VALUE *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error, + const gchar *context) +{ + auto arrow_builder = + static_cast<BUILDER>(garrow_array_builder_get_raw(builder)); + return garrow_array_builder_append_values( + values, + values_length, + is_valids, + is_valids_length, + error, + context, + [&arrow_builder](VALUE *values, + gint64 values_length, + const uint8_t *valid_bytes) -> arrow::Status { + return arrow_builder->AppendValues(values, values_length, valid_bytes); + }); +} + +template <typename BUILDER> +gboolean +garrow_array_builder_append_values(GArrowArrayBuilder *builder, + GBytes **values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error, + const gchar *context) +{ + auto arrow_builder = + static_cast<BUILDER>(garrow_array_builder_get_raw(builder)); + arrow::Status status; + if (is_valids_length > 0 && values_length != is_valids_length) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "%s: values length and is_valids length must be equal: " + "<%" G_GINT64_FORMAT "> != " + "<%" G_GINT64_FORMAT ">", + context, + values_length, + is_valids_length); + return FALSE; + } + + const gint64 chunk_size = 4096; + gint64 n_chunks = values_length / chunk_size; + gint64 n_remains = values_length % chunk_size; + gint64 n_loops = n_chunks; + if (n_remains > 0) { + ++n_loops; + } + for (gint64 i = 0; i < n_loops; ++i) { + std::vector<std::string> strings; + uint8_t *valid_bytes = nullptr; + uint8_t valid_bytes_buffer[chunk_size]; + if (is_valids_length > 0) { + valid_bytes = valid_bytes_buffer; + } + gboolean is_valids_buffer[chunk_size]; + const gint64 offset = chunk_size * i; + gint64 n_values; + if (i == n_chunks) { + n_values = n_remains; + } else { + n_values = chunk_size; + } + for (gint64 j = 0; j < n_values; ++j) { + auto value = values[offset + j]; + if (value) { + size_t data_size; + auto raw_data = g_bytes_get_data(value, &data_size); + strings.push_back(std::string(static_cast<const char *>(raw_data), + data_size)); + if (valid_bytes) { + valid_bytes_buffer[j] = is_valids[offset + j]; + } + } else { + strings.push_back(""); + valid_bytes_buffer[j] = 0; + if (!valid_bytes) { + if (j > 0) { + memset(valid_bytes_buffer, 1, sizeof(uint8_t) * j - 1); + } + valid_bytes = valid_bytes_buffer; + memset(is_valids_buffer, TRUE, sizeof(gboolean) * chunk_size); + is_valids = is_valids_buffer; + } + } + } + status = arrow_builder->AppendValues(strings, valid_bytes); + if (!garrow_error_check(error, status, context)) { + return FALSE; + } + } + return TRUE; +} + +template <typename VALUE, typename GET_VALUE_FUNCTION> +gboolean +garrow_array_builder_append_values( + GArrowArrayBuilder *builder, + VALUE *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error, + const gchar *context, + GET_VALUE_FUNCTION get_value_function) +{ + auto arrow_builder = + static_cast<arrow::FixedSizeBinaryBuilder *>( + garrow_array_builder_get_raw(builder)); + if (is_valids_length > 0 && values_length != is_valids_length) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "%s: values length and is_valids length must be equal: " + "<%" G_GINT64_FORMAT "> != " + "<%" G_GINT64_FORMAT ">", + context, + values_length, + is_valids_length); + return FALSE; + } + + auto value_size = arrow_builder->byte_width(); + const gint64 chunk_size = 4096; + gint64 n_chunks = values_length / chunk_size; + gint64 n_remains = values_length % chunk_size; + gint64 n_loops = n_chunks; + if (n_remains > 0) { + ++n_loops; + } + for (gint64 i = 0; i < n_loops; ++i) { + uint8_t data[value_size * chunk_size]; + uint8_t *valid_bytes = nullptr; + uint8_t valid_bytes_buffer[chunk_size]; + if (is_valids_length > 0) { + valid_bytes = valid_bytes_buffer; + } + const gint64 offset = chunk_size * i; + gint64 n_values; + if (i == n_chunks) { + n_values = n_remains; + } else { + n_values = chunk_size; + } + for (gint64 j = 0; j < n_values; ++j) { + bool is_valid = true; + if (is_valids) { + is_valid = is_valids[offset + j]; + } + VALUE value = nullptr; + if (is_valid) { + value = values[offset + j]; + } + if (value) { + get_value_function(data + (value_size * j), + value, + value_size); + } else { + is_valid = false; + if (!valid_bytes) { + valid_bytes = valid_bytes_buffer; + memset(valid_bytes_buffer, true, j); + } + } + if (valid_bytes) { + valid_bytes_buffer[j] = is_valid; + } + } + auto status = arrow_builder->AppendValues(data, n_values, valid_bytes); + if (!garrow_error_check(error, status, context)) { + return FALSE; + } + } + return TRUE; +} + +template <typename BUILDER> +gboolean +garrow_array_builder_append_values(GArrowArrayBuilder *builder, + GBytes *values, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error, + const gchar *context) +{ + auto arrow_builder = + static_cast<BUILDER>(garrow_array_builder_get_raw(builder)); + auto value_size = arrow_builder->byte_width(); + gsize raw_values_size; + auto raw_values = + static_cast<const uint8_t *>(g_bytes_get_data(values, &raw_values_size)); + const gint64 n_values = raw_values_size / value_size; + if (is_valids_length > 0 && n_values != is_valids_length) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "%s: the number of values and is_valids length must be equal: " + "<%" G_GINT64_FORMAT "> != " + "<%" G_GINT64_FORMAT ">", + context, + n_values, + is_valids_length); + return FALSE; + } + + if (is_valids_length == 0) { + auto status = arrow_builder->AppendValues(raw_values, n_values); + if (!garrow_error_check(error, status, context)) { + return FALSE; + } + return TRUE; + } + + const gint64 chunk_size = 4096; + gint64 n_chunks = n_values / chunk_size; + gint64 n_remains = n_values % chunk_size; + gint64 n_loops = n_chunks; + if (n_remains > 0) { + ++n_loops; + } + for (gint64 i = 0; i < n_loops; ++i) { + uint8_t valid_bytes[chunk_size]; + const auto offset = chunk_size * i; + gint64 n_values; + if (i == n_chunks) { + n_values = n_remains; + } else { + n_values = chunk_size; + } + for (gint64 j = 0; j < n_values; ++j) { + valid_bytes[j] = is_valids[offset + j]; + } + auto status = arrow_builder->AppendValues(raw_values + (value_size * offset), + n_values, + valid_bytes); + if (!garrow_error_check(error, status, context)) { + return FALSE; + } + } + return TRUE; +} + + +G_BEGIN_DECLS + +/** + * SECTION: array-builder + * @section_id: array-builder-classes + * @title: Array builder classes + * @include: arrow-glib/arrow-glib.h + * + * #GArrowArrayBuilder is a base class for all array builder classes + * such as #GArrowBooleanArrayBuilder. + * + * You need to use array builder class to create a new array. + * + * #GArrowNullArrayBuilder is the class to create a new + * #GArrowNullArray. + * + * #GArrowBooleanArrayBuilder is the class to create a new + * #GArrowBooleanArray. + * + * #GArrowIntArrayBuilder is the class to create a new integer + * array. Integer size is automatically chosen. It's recommend that + * you use this builder instead of specific integer size builder such + * as #GArrowInt8ArrayBuilder. + * + * #GArrowUIntArrayBuilder is the class to create a new unsigned + * integer array. Unsigned integer size is automatically chosen. It's + * recommend that you use this builder instead of specific unsigned + * integer size builder such as #GArrowUInt8ArrayBuilder. + * + * #GArrowInt8ArrayBuilder is the class to create a new + * #GArrowInt8Array. + * + * #GArrowUInt8ArrayBuilder is the class to create a new + * #GArrowUInt8Array. + * + * #GArrowInt16ArrayBuilder is the class to create a new + * #GArrowInt16Array. + * + * #GArrowUInt16ArrayBuilder is the class to create a new + * #GArrowUInt16Array. + * + * #GArrowInt32ArrayBuilder is the class to create a new + * #GArrowInt32Array. + * + * #GArrowUInt32ArrayBuilder is the class to create a new + * #GArrowUInt32Array. + * + * #GArrowInt64ArrayBuilder is the class to create a new + * #GArrowInt64Array. + * + * #GArrowUInt64ArrayBuilder is the class to create a new + * #GArrowUInt64Array. + * + * #GArrowFloatArrayBuilder is the class to creating a new + * #GArrowFloatArray. + * + * #GArrowDoubleArrayBuilder is the class to create a new + * #GArrowDoubleArray. + * + * #GArrowBinaryArrayBuilder is the class to create a new + * #GArrowBinaryArray. + * + * #GArrowLargeBinaryArrayBuilder is the class to create a new + * #GArrowLargeBinaryArray. + * + * #GArrowStringArrayBuilder is the class to create a new + * #GArrowStringArray. + * + * #GArrowLargeStringArrayBuilder is the class to create a new + * #GArrowLargeStringArray. + * + * #GArrowFixedSizeBinaryArrayBuilder is the class to create a new + * #GArrowFixedSizeBinaryArray. + * + * #GArrowDate32ArrayBuilder is the class to create a new + * #GArrowDate32Array. + * + * #GArrowDate64ArrayBuilder is the class to create a new + * #GArrowDate64Array. + * + * #GArrowTimestampArrayBuilder is the class to create a new + * #GArrowTimestampArray. + * + * #GArrowTime32ArrayBuilder is the class to create a new + * #GArrowTime32Array. + * + * #GArrowTime64ArrayBuilder is the class to create a new + * #GArrowTime64Array. + * + * #GArrowStringDictionaryArrayBuilder is the class to create a new + * #GArrowDictionaryArray with a dictionary array of #GArrowStringArray. + * + * #GArrowListArrayBuilder is the class to create a new + * #GArrowListArray. + * + * #GArrowLargeListArrayBuilder is the class to create a new + * #GArrowLargeListArray. + * + * #GArrowStructArrayBuilder is the class to create a new + * #GArrowStructArray. + * + * #GArrowMapArrayBuilder is the class to create a new + * #GArrowMapArray. + * + * #GArrowDecimal128ArrayBuilder is the class to create a new + * #GArrowDecimal128Array. + * + * #GArrowDecimal256ArrayBuilder is the class to create a new + * #GArrowDecimal256Array. + */ + +typedef struct GArrowArrayBuilderPrivate_ { + arrow::ArrayBuilder *array_builder; + gboolean have_ownership; +} GArrowArrayBuilderPrivate; + +enum { + PROP_0, + PROP_ARRAY_BUILDER +}; + +G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowArrayBuilder, + garrow_array_builder, + G_TYPE_OBJECT) + +#define GARROW_ARRAY_BUILDER_GET_PRIVATE(obj) \ + static_cast<GArrowArrayBuilderPrivate *>( \ + garrow_array_builder_get_instance_private( \ + GARROW_ARRAY_BUILDER(obj))) + +static void +garrow_array_builder_finalize(GObject *object) +{ + auto priv = GARROW_ARRAY_BUILDER_GET_PRIVATE(object); + + if (priv->have_ownership) { + delete priv->array_builder; + } + + G_OBJECT_CLASS(garrow_array_builder_parent_class)->finalize(object); +} + +static void +garrow_array_builder_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_ARRAY_BUILDER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_ARRAY_BUILDER: + priv->array_builder = + static_cast<arrow::ArrayBuilder *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_array_builder_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + switch (prop_id) { + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_array_builder_init(GArrowArrayBuilder *builder) +{ + auto priv = GARROW_ARRAY_BUILDER_GET_PRIVATE(builder); + priv->have_ownership = TRUE; +} + +static void +garrow_array_builder_class_init(GArrowArrayBuilderClass *klass) +{ + GObjectClass *gobject_class; + GParamSpec *spec; + + gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_array_builder_finalize; + gobject_class->set_property = garrow_array_builder_set_property; + gobject_class->get_property = garrow_array_builder_get_property; + + spec = g_param_spec_pointer("array-builder", + "Array builder", + "The raw arrow::ArrayBuilder *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_ARRAY_BUILDER, spec); +} + +static GArrowArrayBuilder * +garrow_array_builder_new(const std::shared_ptr<arrow::DataType> &type, + GError **error, + const char *context) +{ + auto memory_pool = arrow::default_memory_pool(); + std::unique_ptr<arrow::ArrayBuilder> arrow_builder; + auto status = arrow::MakeBuilder(memory_pool, type, &arrow_builder); + if (!garrow_error_check(error, status, context)) { + return NULL; + } + return garrow_array_builder_new_raw(arrow_builder.release()); +} + +/** + * garrow_array_builder_release_ownership: (skip) + * @builder: A #GArrowArrayBuilder. + * + * Release ownership of `arrow::ArrayBuilder` in `builder`. + * + * Since: 0.8.0 + */ +void +garrow_array_builder_release_ownership(GArrowArrayBuilder *builder) +{ + auto priv = GARROW_ARRAY_BUILDER_GET_PRIVATE(builder); + priv->have_ownership = FALSE; +} + +/** + * garrow_array_builder_get_value_data_type: + * @builder: A #GArrowArrayBuilder. + * + * Returns: (transfer full): The #GArrowDataType of the value of + * the array builder. + * + * Since: 0.9.0 + */ +GArrowDataType * +garrow_array_builder_get_value_data_type(GArrowArrayBuilder *builder) +{ + auto arrow_builder = garrow_array_builder_get_raw(builder); + auto arrow_type = arrow_builder->type(); + return garrow_data_type_new_raw(&arrow_type); +} + +/** + * garrow_array_builder_get_value_type: + * @builder: A #GArrowArrayBuilder. + * + * Returns: The #GArrowType of the value of the array builder. + * + * Since: 0.9.0 + */ +GArrowType +garrow_array_builder_get_value_type(GArrowArrayBuilder *builder) +{ + auto arrow_builder = garrow_array_builder_get_raw(builder); + auto arrow_type = arrow_builder->type(); + return garrow_type_from_raw(arrow_type->id()); +} + +/** + * garrow_array_builder_finish: + * @builder: A #GArrowArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): The built #GArrowArray on success, + * %NULL on error. + */ +GArrowArray * +garrow_array_builder_finish(GArrowArrayBuilder *builder, GError **error) +{ + auto arrow_builder = garrow_array_builder_get_raw(builder); + std::shared_ptr<arrow::Array> arrow_array; + auto status = arrow_builder->Finish(&arrow_array); + if (garrow_error_check(error, status, "[array-builder][finish]")) { + return garrow_array_new_raw(&arrow_array); + } else { + return NULL; + } +} + +/** + * garrow_array_builder_reset: + * @builder: A #GArrowArrayBuilder. + * + * Since: 2.0.0 + */ +void +garrow_array_builder_reset(GArrowArrayBuilder *builder) +{ + auto arrow_builder = garrow_array_builder_get_raw(builder); + arrow_builder->Reset(); +} + +/** + * garrow_array_builder_get_capacity: + * @builder: A #GArrowArrayBuilder. + * + * Returns: The capacity of the building array. + * + * Since: 2.0.0 + */ +gint64 +garrow_array_builder_get_capacity(GArrowArrayBuilder *builder) +{ + auto arrow_builder = garrow_array_builder_get_raw(builder); + return arrow_builder->capacity(); +} + +/** + * garrow_array_builder_get_length: + * @builder: A #GArrowArrayBuilder. + * + * Returns: The current length of the building array. + * + * Since: 2.0.0 + */ +gint64 +garrow_array_builder_get_length(GArrowArrayBuilder *builder) +{ + auto arrow_builder = garrow_array_builder_get_raw(builder); + return arrow_builder->length(); +} + +/** + * garrow_array_builder_get_n_nulls: + * @builder: A #GArrowArrayBuilder. + * + * Returns: The current number of null elements in the building array. + * + * Since: 2.0.0 + */ +gint64 +garrow_array_builder_get_n_nulls(GArrowArrayBuilder *builder) +{ + auto arrow_builder = garrow_array_builder_get_raw(builder); + return arrow_builder->null_count(); +} + +/** + * garrow_array_builder_resize: + * @builder: A #GArrowArrayBuilder. + * @capacity: A new capacity. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 2.0.0 + */ +gboolean +garrow_array_builder_resize(GArrowArrayBuilder *builder, + gint64 capacity, + GError **error) +{ + auto arrow_builder = garrow_array_builder_get_raw(builder); + auto status = arrow_builder->Resize(capacity); + return garrow_error_check(error, status, "[array-builder][resize]"); +} + +/** + * garrow_array_builder_reserve: + * @builder: A #GArrowArrayBuilder. + * @additional_capacity: The additional capacity to be reserved. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 2.0.0 + */ +gboolean +garrow_array_builder_reserve(GArrowArrayBuilder *builder, + gint64 additional_capacity, + GError **error) +{ + auto arrow_builder = garrow_array_builder_get_raw(builder); + auto status = arrow_builder->Reserve(additional_capacity); + return garrow_error_check(error, status, "[array-builder][reserve]"); +} + +/** + * garrow_array_builder_append_null: + * @builder: A #GArrowArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 3.0.0 + */ +gboolean +garrow_array_builder_append_null(GArrowArrayBuilder *builder, + GError **error) +{ + auto arrow_builder = garrow_array_builder_get_raw(builder); + auto status = arrow_builder->AppendNull(); + return garrow_error_check(error, status, "[array-builder][append-null]"); +} + +/** + * garrow_array_builder_append_nulls: + * @builder: A #GArrowArrayBuilder. + * @n: The number of null values to be appended. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple nulls at once. It's more efficient than multiple + * garrow_array_builder_append_null() calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 3.0.0 + */ +gboolean +garrow_array_builder_append_nulls(GArrowArrayBuilder *builder, + gint64 n, + GError **error) +{ + const gchar *context = "[array-builder][append-nulls]"; + if (n < 0) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "%s: the number of nulls must be 0 or larger: " + "<%" G_GINT64_FORMAT ">", + context, + n); + return FALSE; + } + if (n == 0) { + return TRUE; + } + + auto arrow_builder = garrow_array_builder_get_raw(builder); + auto status = arrow_builder->AppendNulls(n); + return garrow_error_check(error, status, context); +} + +/** + * garrow_array_builder_append_empty_value: + * @builder: A #GArrowArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 3.0.0 + */ +gboolean +garrow_array_builder_append_empty_value(GArrowArrayBuilder *builder, + GError **error) +{ + auto arrow_builder = garrow_array_builder_get_raw(builder); + auto status = arrow_builder->AppendEmptyValue(); + return garrow_error_check(error, + status, + "[array-builder][append-empty-value]"); +} + +/** + * garrow_array_builder_append_empty_values: + * @builder: A #GArrowArrayBuilder. + * @n: The number of null values to be appended. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple empty values at once. It's more efficient than multiple + * garrow_array_builder_append_empty_value() calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 3.0.0 + */ +gboolean +garrow_array_builder_append_empty_values(GArrowArrayBuilder *builder, + gint64 n, + GError **error) +{ + const gchar *context = "[array-builder][append-empty-values]"; + if (n < 0) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "%s: the number of empty values must be 0 or larger: " + "<%" G_GINT64_FORMAT ">", + context, + n); + return FALSE; + } + if (n == 0) { + return TRUE; + } + + auto arrow_builder = garrow_array_builder_get_raw(builder); + auto status = arrow_builder->AppendEmptyValues(n); + return garrow_error_check(error, status, context); +} + + +G_DEFINE_TYPE(GArrowNullArrayBuilder, + garrow_null_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +static void +garrow_null_array_builder_init(GArrowNullArrayBuilder *builder) +{ +} + +static void +garrow_null_array_builder_class_init(GArrowNullArrayBuilderClass *klass) +{ +} + +/** + * garrow_null_array_builder_new: + * + * Returns: A newly created #GArrowNullArrayBuilder. + * + * Since: 0.13.0 + */ +GArrowNullArrayBuilder * +garrow_null_array_builder_new(void) +{ + auto builder = garrow_array_builder_new(arrow::null(), + NULL, + "[null-array-builder][new]"); + return GARROW_NULL_ARRAY_BUILDER(builder); +} + +/** + * garrow_null_array_builder_append_null: (skip) + * @builder: A #GArrowNullArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.13.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_null_array_builder_append_null(GArrowNullArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_null_array_builder_append_nulls: (skip) + * @builder: A #GArrowNullArrayBuilder. + * @n: The number of null values to be appended. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple nulls at once. It's more efficient than multiple + * `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.13.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_nulls() instead. + */ +gboolean +garrow_null_array_builder_append_nulls(GArrowNullArrayBuilder *builder, + gint64 n, + GError **error) +{ + return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), + n, + error); +} + + +G_DEFINE_TYPE(GArrowBooleanArrayBuilder, + garrow_boolean_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +static void +garrow_boolean_array_builder_init(GArrowBooleanArrayBuilder *builder) +{ +} + +static void +garrow_boolean_array_builder_class_init(GArrowBooleanArrayBuilderClass *klass) +{ +} + +/** + * garrow_boolean_array_builder_new: + * + * Returns: A newly created #GArrowBooleanArrayBuilder. + */ +GArrowBooleanArrayBuilder * +garrow_boolean_array_builder_new(void) +{ + auto builder = garrow_array_builder_new(arrow::boolean(), + NULL, + "[boolean-array-builder][new]"); + return GARROW_BOOLEAN_ARRAY_BUILDER(builder); +} + +/** + * garrow_boolean_array_builder_append: + * @builder: A #GArrowBooleanArrayBuilder. + * @value: A boolean value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_boolean_array_builder_append_value() instead. + */ +gboolean +garrow_boolean_array_builder_append(GArrowBooleanArrayBuilder *builder, + gboolean value, + GError **error) +{ + return garrow_boolean_array_builder_append_value(builder, value, error); +} + +/** + * garrow_boolean_array_builder_append_value: + * @builder: A #GArrowBooleanArrayBuilder. + * @value: A boolean value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_boolean_array_builder_append_value(GArrowBooleanArrayBuilder *builder, + gboolean value, + GError **error) +{ + return garrow_array_builder_append_value<arrow::BooleanBuilder *> + (GARROW_ARRAY_BUILDER(builder), + static_cast<bool>(value), + error, + "[boolean-array-builder][append-value]"); +} + +/** + * garrow_boolean_array_builder_append_values: + * @builder: A #GArrowBooleanArrayBuilder. + * @values: (array length=values_length): The array of boolean. + * @values_length: The length of `values`. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth `is_valids` is %TRUE, the Nth `values` is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of `is_valids`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + */ +gboolean +garrow_boolean_array_builder_append_values(GArrowBooleanArrayBuilder *builder, + const gboolean *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + guint8 arrow_values[values_length]; + for (gint64 i = 0; i < values_length; ++i) { + arrow_values[i] = values[i]; + } + return garrow_array_builder_append_values<arrow::BooleanBuilder *> + (GARROW_ARRAY_BUILDER(builder), + arrow_values, + values_length, + is_valids, + is_valids_length, + error, + "[boolean-array-builder][append-values]"); +} + +/** + * garrow_boolean_array_builder_append_null: (skip) + * @builder: A #GArrowBooleanArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_boolean_array_builder_append_null(GArrowBooleanArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_boolean_array_builder_append_nulls: (skip) + * @builder: A #GArrowBooleanArrayBuilder. + * @n: The number of null values to be appended. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple nulls at once. It's more efficient than multiple + * `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_nulls() instead. + */ +gboolean +garrow_boolean_array_builder_append_nulls(GArrowBooleanArrayBuilder *builder, + gint64 n, + GError **error) +{ + return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), + n, + error); +} + + +G_DEFINE_TYPE(GArrowIntArrayBuilder, + garrow_int_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +static void +garrow_int_array_builder_init(GArrowIntArrayBuilder *builder) +{ +} + +static void +garrow_int_array_builder_class_init(GArrowIntArrayBuilderClass *klass) +{ +} + +/** + * garrow_int_array_builder_new: + * + * Returns: A newly created #GArrowIntArrayBuilder. + * + * Since: 0.6.0 + */ +GArrowIntArrayBuilder * +garrow_int_array_builder_new(void) +{ + auto memory_pool = arrow::default_memory_pool(); + auto arrow_builder = new arrow::AdaptiveIntBuilder(memory_pool); + auto builder = garrow_array_builder_new_raw(arrow_builder, + GARROW_TYPE_INT_ARRAY_BUILDER); + return GARROW_INT_ARRAY_BUILDER(builder); +} + +/** + * garrow_int_array_builder_append: + * @builder: A #GArrowIntArrayBuilder. + * @value: A int value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.6.0 + * + * Deprecated: 0.12.0: + * Use garrow_int_array_builder_append_value() instead. + */ +gboolean +garrow_int_array_builder_append(GArrowIntArrayBuilder *builder, + gint64 value, + GError **error) +{ + return garrow_int_array_builder_append_value(builder, value, error); +} + +/** + * garrow_int_array_builder_append_value: + * @builder: A #GArrowIntArrayBuilder. + * @value: A int value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_int_array_builder_append_value(GArrowIntArrayBuilder *builder, + gint64 value, + GError **error) +{ + return garrow_array_builder_append_value<arrow::AdaptiveIntBuilder *> + (GARROW_ARRAY_BUILDER(builder), + value, + error, + "[int-array-builder][append-value]"); +} + +/** + * garrow_int_array_builder_append_values: + * @builder: A #GArrowIntArrayBuilder. + * @values: (array length=values_length): The array of int. + * @values_length: The length of `values`. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth `is_valids` is %TRUE, the Nth `values` is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of `is_valids`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + */ +gboolean +garrow_int_array_builder_append_values(GArrowIntArrayBuilder *builder, + const gint64 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values<arrow::AdaptiveIntBuilder *> + (GARROW_ARRAY_BUILDER(builder), + reinterpret_cast<const int64_t *>(values), + values_length, + is_valids, + is_valids_length, + error, + "[int-array-builder][append-values]"); +} + +/** + * garrow_int_array_builder_append_null: (skip) + * @builder: A #GArrowIntArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.6.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_int_array_builder_append_null(GArrowIntArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_int_array_builder_append_nulls: (skip) + * @builder: A #GArrowIntArrayBuilder. + * @n: The number of null values to be appended. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple nulls at once. It's more efficient than multiple + * `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_nulls() instead. + */ +gboolean +garrow_int_array_builder_append_nulls(GArrowIntArrayBuilder *builder, + gint64 n, + GError **error) +{ + return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), + n, + error); +} + + +G_DEFINE_TYPE(GArrowUIntArrayBuilder, + garrow_uint_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +static void +garrow_uint_array_builder_init(GArrowUIntArrayBuilder *builder) +{ +} + +static void +garrow_uint_array_builder_class_init(GArrowUIntArrayBuilderClass *klass) +{ +} + +/** + * garrow_uint_array_builder_new: + * + * Returns: A newly created #GArrowUIntArrayBuilder. + * + * Since: 0.8.0 + */ +GArrowUIntArrayBuilder * +garrow_uint_array_builder_new(void) +{ + auto memory_pool = arrow::default_memory_pool(); + auto arrow_builder = new arrow::AdaptiveUIntBuilder(memory_pool); + auto builder = garrow_array_builder_new_raw(arrow_builder, + GARROW_TYPE_UINT_ARRAY_BUILDER); + return GARROW_UINT_ARRAY_BUILDER(builder); +} + +/** + * garrow_uint_array_builder_append: + * @builder: A #GArrowUIntArrayBuilder. + * @value: A unsigned int value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + * + * Deprecated: 0.12.0: + * Use garrow_uint_array_builder_append_value() instead. + */ +gboolean +garrow_uint_array_builder_append(GArrowUIntArrayBuilder *builder, + guint64 value, + GError **error) +{ + return garrow_uint_array_builder_append_value(builder, value, error); +} + +/** + * garrow_uint_array_builder_append_value: + * @builder: A #GArrowUIntArrayBuilder. + * @value: A unsigned int value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_uint_array_builder_append_value(GArrowUIntArrayBuilder *builder, + guint64 value, + GError **error) +{ + return garrow_array_builder_append_value<arrow::AdaptiveUIntBuilder *> + (GARROW_ARRAY_BUILDER(builder), + value, + error, + "[uint-array-builder][append-value]"); +} + +/** + * garrow_uint_array_builder_append_values: + * @builder: A #GArrowUIntArrayBuilder. + * @values: (array length=values_length): The array of unsigned int. + * @values_length: The length of `values`. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth `is_valids` is %TRUE, the Nth `values` is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of `is_valids`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + */ +gboolean +garrow_uint_array_builder_append_values(GArrowUIntArrayBuilder *builder, + const guint64 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values<arrow::AdaptiveUIntBuilder *> + (GARROW_ARRAY_BUILDER(builder), + reinterpret_cast<const uint64_t *>(values), + values_length, + is_valids, + is_valids_length, + error, + "[uint-array-builder][append-values]"); +} + +/** + * garrow_uint_array_builder_append_null: (skip) + * @builder: A #GArrowUIntArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_uint_array_builder_append_null(GArrowUIntArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_uint_array_builder_append_nulls: (skip) + * @builder: A #GArrowUIntArrayBuilder. + * @n: The number of null values to be appended. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple nulls at once. It's more efficient than multiple + * `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_nulls() instead. + */ +gboolean +garrow_uint_array_builder_append_nulls(GArrowUIntArrayBuilder *builder, + gint64 n, + GError **error) +{ + return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), + n, + error); +} + + +G_DEFINE_TYPE(GArrowInt8ArrayBuilder, + garrow_int8_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +static void +garrow_int8_array_builder_init(GArrowInt8ArrayBuilder *builder) +{ +} + +static void +garrow_int8_array_builder_class_init(GArrowInt8ArrayBuilderClass *klass) +{ +} + +/** + * garrow_int8_array_builder_new: + * + * Returns: A newly created #GArrowInt8ArrayBuilder. + */ +GArrowInt8ArrayBuilder * +garrow_int8_array_builder_new(void) +{ + auto builder = garrow_array_builder_new(arrow::int8(), + NULL, + "[int8-array-builder][new]"); + return GARROW_INT8_ARRAY_BUILDER(builder); +} + +/** + * garrow_int8_array_builder_append: + * @builder: A #GArrowInt8ArrayBuilder. + * @value: A int8 value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_int8_array_builder_append_value() instead. + */ +gboolean +garrow_int8_array_builder_append(GArrowInt8ArrayBuilder *builder, + gint8 value, + GError **error) +{ + return garrow_int8_array_builder_append_value(builder, value, error); +} + +/** + * garrow_int8_array_builder_append_value: + * @builder: A #GArrowInt8ArrayBuilder. + * @value: A int8 value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_int8_array_builder_append_value(GArrowInt8ArrayBuilder *builder, + gint8 value, + GError **error) +{ + return garrow_array_builder_append_value<arrow::Int8Builder *> + (GARROW_ARRAY_BUILDER(builder), + value, + error, + "[int8-array-builder][append-value]"); +} + +/** + * garrow_int8_array_builder_append_values: + * @builder: A #GArrowInt8ArrayBuilder. + * @values: (array length=values_length): The array of int8. + * @values_length: The length of `values`. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth `is_valids` is %TRUE, the Nth `values` is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of `is_valids`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + */ +gboolean +garrow_int8_array_builder_append_values(GArrowInt8ArrayBuilder *builder, + const gint8 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values<arrow::Int8Builder *> + (GARROW_ARRAY_BUILDER(builder), + values, + values_length, + is_valids, + is_valids_length, + error, + "[int8-array-builder][append-values]"); +} + +/** + * garrow_int8_array_builder_append_null: (skip) + * @builder: A #GArrowInt8ArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_int8_array_builder_append_null(GArrowInt8ArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_int8_array_builder_append_nulls: (skip) + * @builder: A #GArrowInt8ArrayBuilder. + * @n: The number of null values to be appended. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple nulls at once. It's more efficient than multiple + * `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_nulls() instead. + */ +gboolean +garrow_int8_array_builder_append_nulls(GArrowInt8ArrayBuilder *builder, + gint64 n, + GError **error) +{ + return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), + n, + error); +} + + +G_DEFINE_TYPE(GArrowUInt8ArrayBuilder, + garrow_uint8_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +static void +garrow_uint8_array_builder_init(GArrowUInt8ArrayBuilder *builder) +{ +} + +static void +garrow_uint8_array_builder_class_init(GArrowUInt8ArrayBuilderClass *klass) +{ +} + +/** + * garrow_uint8_array_builder_new: + * + * Returns: A newly created #GArrowUInt8ArrayBuilder. + */ +GArrowUInt8ArrayBuilder * +garrow_uint8_array_builder_new(void) +{ + auto builder = garrow_array_builder_new(arrow::uint8(), + NULL, + "[uint8-array-builder][new]"); + return GARROW_UINT8_ARRAY_BUILDER(builder); +} + +/** + * garrow_uint8_array_builder_append: + * @builder: A #GArrowUInt8ArrayBuilder. + * @value: An uint8 value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_uint8_array_builder_append_value() instead. + */ +gboolean +garrow_uint8_array_builder_append(GArrowUInt8ArrayBuilder *builder, + guint8 value, + GError **error) +{ + return garrow_uint8_array_builder_append_value(builder, value, error); +} + +/** + * garrow_uint8_array_builder_append_value: + * @builder: A #GArrowUInt8ArrayBuilder. + * @value: An uint8 value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_uint8_array_builder_append_value(GArrowUInt8ArrayBuilder *builder, + guint8 value, + GError **error) +{ + return garrow_array_builder_append_value<arrow::UInt8Builder *> + (GARROW_ARRAY_BUILDER(builder), + value, + error, + "[uint8-array-builder][append-value]"); +} + +/** + * garrow_uint8_array_builder_append_values: + * @builder: A #GArrowUInt8ArrayBuilder. + * @values: (array length=values_length): The array of uint8. + * @values_length: The length of `values`. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth `is_valids` is %TRUE, the Nth `values` is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of `is_valids`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + */ +gboolean +garrow_uint8_array_builder_append_values(GArrowUInt8ArrayBuilder *builder, + const guint8 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values<arrow::UInt8Builder *> + (GARROW_ARRAY_BUILDER(builder), + values, + values_length, + is_valids, + is_valids_length, + error, + "[uint8-array-builder][append-values]"); +} + +/** + * garrow_uint8_array_builder_append_null: (skip) + * @builder: A #GArrowUInt8ArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_uint8_array_builder_append_null(GArrowUInt8ArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_uint8_array_builder_append_nulls: (skip) + * @builder: A #GArrowUInt8ArrayBuilder. + * @n: The number of null values to be appended. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple nulls at once. It's more efficient than multiple + * `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_nulls() instead. + */ +gboolean +garrow_uint8_array_builder_append_nulls(GArrowUInt8ArrayBuilder *builder, + gint64 n, + GError **error) +{ + return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), + n, + error); +} + + +G_DEFINE_TYPE(GArrowInt16ArrayBuilder, + garrow_int16_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +static void +garrow_int16_array_builder_init(GArrowInt16ArrayBuilder *builder) +{ +} + +static void +garrow_int16_array_builder_class_init(GArrowInt16ArrayBuilderClass *klass) +{ +} + +/** + * garrow_int16_array_builder_new: + * + * Returns: A newly created #GArrowInt16ArrayBuilder. + */ +GArrowInt16ArrayBuilder * +garrow_int16_array_builder_new(void) +{ + auto builder = garrow_array_builder_new(arrow::int16(), + NULL, + "[int16-array-builder][new]"); + return GARROW_INT16_ARRAY_BUILDER(builder); +} + +/** + * garrow_int16_array_builder_append: + * @builder: A #GArrowInt16ArrayBuilder. + * @value: A int16 value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_int16_array_builder_append_value() instead. + */ +gboolean +garrow_int16_array_builder_append(GArrowInt16ArrayBuilder *builder, + gint16 value, + GError **error) +{ + return garrow_int16_array_builder_append_value(builder, value, error); +} + +/** + * garrow_int16_array_builder_append_value: + * @builder: A #GArrowInt16ArrayBuilder. + * @value: A int16 value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_int16_array_builder_append_value(GArrowInt16ArrayBuilder *builder, + gint16 value, + GError **error) +{ + return garrow_array_builder_append_value<arrow::Int16Builder *> + (GARROW_ARRAY_BUILDER(builder), + value, + error, + "[int16-array-builder][append-value]"); +} + +/** + * garrow_int16_array_builder_append_values: + * @builder: A #GArrowInt16ArrayBuilder. + * @values: (array length=values_length): The array of int16. + * @values_length: The length of `values`. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth `is_valids` is %TRUE, the Nth `values` is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of `is_valids`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + */ +gboolean +garrow_int16_array_builder_append_values(GArrowInt16ArrayBuilder *builder, + const gint16 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values<arrow::Int16Builder *> + (GARROW_ARRAY_BUILDER(builder), + values, + values_length, + is_valids, + is_valids_length, + error, + "[int16-array-builder][append-values]"); +} + +/** + * garrow_int16_array_builder_append_null: (skip) + * @builder: A #GArrowInt16ArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_int16_array_builder_append_null(GArrowInt16ArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_int16_array_builder_append_nulls: (skip) + * @builder: A #GArrowInt16ArrayBuilder. + * @n: The number of null values to be appended. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple nulls at once. It's more efficient than multiple + * `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_nulls() instead. + */ +gboolean +garrow_int16_array_builder_append_nulls(GArrowInt16ArrayBuilder *builder, + gint64 n, + GError **error) +{ + return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), + n, + error); +} + + +G_DEFINE_TYPE(GArrowUInt16ArrayBuilder, + garrow_uint16_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +static void +garrow_uint16_array_builder_init(GArrowUInt16ArrayBuilder *builder) +{ +} + +static void +garrow_uint16_array_builder_class_init(GArrowUInt16ArrayBuilderClass *klass) +{ +} + +/** + * garrow_uint16_array_builder_new: + * + * Returns: A newly created #GArrowUInt16ArrayBuilder. + */ +GArrowUInt16ArrayBuilder * +garrow_uint16_array_builder_new(void) +{ + auto builder = garrow_array_builder_new(arrow::uint16(), + NULL, + "[uint16-array-builder][new]"); + return GARROW_UINT16_ARRAY_BUILDER(builder); +} + +/** + * garrow_uint16_array_builder_append: + * @builder: A #GArrowUInt16ArrayBuilder. + * @value: An uint16 value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_uint16_array_builder_append_value() instead. + */ +gboolean +garrow_uint16_array_builder_append(GArrowUInt16ArrayBuilder *builder, + guint16 value, + GError **error) +{ + return garrow_uint16_array_builder_append_value(builder, value, error); +} + +/** + * garrow_uint16_array_builder_append_value: + * @builder: A #GArrowUInt16ArrayBuilder. + * @value: An uint16 value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_uint16_array_builder_append_value(GArrowUInt16ArrayBuilder *builder, + guint16 value, + GError **error) +{ + return garrow_array_builder_append_value<arrow::UInt16Builder *> + (GARROW_ARRAY_BUILDER(builder), + value, + error, + "[uint16-array-builder][append-value]"); +} + +/** + * garrow_uint16_array_builder_append_values: + * @builder: A #GArrowUInt16ArrayBuilder. + * @values: (array length=values_length): The array of uint16. + * @values_length: The length of `values`. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth `is_valids` is %TRUE, the Nth `values` is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of `is_valids`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + */ +gboolean +garrow_uint16_array_builder_append_values(GArrowUInt16ArrayBuilder *builder, + const guint16 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values<arrow::UInt16Builder *> + (GARROW_ARRAY_BUILDER(builder), + values, + values_length, + is_valids, + is_valids_length, + error, + "[uint16-array-builder][append-values]"); +} + +/** + * garrow_uint16_array_builder_append_null: (skip) + * @builder: A #GArrowUInt16ArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_uint16_array_builder_append_null(GArrowUInt16ArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_uint16_array_builder_append_nulls: (skip) + * @builder: A #GArrowUInt16ArrayBuilder. + * @n: The number of null values to be appended. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple nulls at once. It's more efficient than multiple + * `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_nulls() instead. + */ +gboolean +garrow_uint16_array_builder_append_nulls(GArrowUInt16ArrayBuilder *builder, + gint64 n, + GError **error) +{ + return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), + n, + error); +} + + +G_DEFINE_TYPE(GArrowInt32ArrayBuilder, + garrow_int32_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +static void +garrow_int32_array_builder_init(GArrowInt32ArrayBuilder *builder) +{ +} + +static void +garrow_int32_array_builder_class_init(GArrowInt32ArrayBuilderClass *klass) +{ +} + +/** + * garrow_int32_array_builder_new: + * + * Returns: A newly created #GArrowInt32ArrayBuilder. + */ +GArrowInt32ArrayBuilder * +garrow_int32_array_builder_new(void) +{ + auto builder = garrow_array_builder_new(arrow::int32(), + NULL, + "[int32-array-builder][new]"); + return GARROW_INT32_ARRAY_BUILDER(builder); +} + +/** + * garrow_int32_array_builder_append: + * @builder: A #GArrowInt32ArrayBuilder. + * @value: A int32 value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_int32_array_builder_append_value() instead. + */ +gboolean +garrow_int32_array_builder_append(GArrowInt32ArrayBuilder *builder, + gint32 value, + GError **error) +{ + return garrow_int32_array_builder_append_value(builder, value, error); +} + +/** + * garrow_int32_array_builder_append_value: + * @builder: A #GArrowInt32ArrayBuilder. + * @value: A int32 value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_int32_array_builder_append_value(GArrowInt32ArrayBuilder *builder, + gint32 value, + GError **error) +{ + return garrow_array_builder_append_value<arrow::Int32Builder *> + (GARROW_ARRAY_BUILDER(builder), + value, + error, + "[int32-array-builder][append-value]"); +} + +/** + * garrow_int32_array_builder_append_values: + * @builder: A #GArrowInt32ArrayBuilder. + * @values: (array length=values_length): The array of int32. + * @values_length: The length of `values`. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth `is_valids` is %TRUE, the Nth `values` is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of `is_valids`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + */ +gboolean +garrow_int32_array_builder_append_values(GArrowInt32ArrayBuilder *builder, + const gint32 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values<arrow::Int32Builder *> + (GARROW_ARRAY_BUILDER(builder), + values, + values_length, + is_valids, + is_valids_length, + error, + "[int32-array-builder][append-values]"); +} + +/** + * garrow_int32_array_builder_append_null: (skip) + * @builder: A #GArrowInt32ArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_int32_array_builder_append_null(GArrowInt32ArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_int32_array_builder_append_nulls: (skip) + * @builder: A #GArrowInt32ArrayBuilder. + * @n: The number of null values to be appended. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple nulls at once. It's more efficient than multiple + * `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_nulls() instead. + */ +gboolean +garrow_int32_array_builder_append_nulls(GArrowInt32ArrayBuilder *builder, + gint64 n, + GError **error) +{ + return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), + n, + error); +} + + +G_DEFINE_TYPE(GArrowUInt32ArrayBuilder, + garrow_uint32_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +static void +garrow_uint32_array_builder_init(GArrowUInt32ArrayBuilder *builder) +{ +} + +static void +garrow_uint32_array_builder_class_init(GArrowUInt32ArrayBuilderClass *klass) +{ +} + +/** + * garrow_uint32_array_builder_new: + * + * Returns: A newly created #GArrowUInt32ArrayBuilder. + */ +GArrowUInt32ArrayBuilder * +garrow_uint32_array_builder_new(void) +{ + auto builder = garrow_array_builder_new(arrow::uint32(), + NULL, + "[uint32-array-builder][new]"); + return GARROW_UINT32_ARRAY_BUILDER(builder); +} + +/** + * garrow_uint32_array_builder_append: + * @builder: A #GArrowUInt32ArrayBuilder. + * @value: An uint32 value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_uint32_array_builder_append_value() instead. + */ +gboolean +garrow_uint32_array_builder_append(GArrowUInt32ArrayBuilder *builder, + guint32 value, + GError **error) +{ + return garrow_uint32_array_builder_append_value(builder, value, error); +} + +/** + * garrow_uint32_array_builder_append_value: + * @builder: A #GArrowUInt32ArrayBuilder. + * @value: An uint32 value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_uint32_array_builder_append_value(GArrowUInt32ArrayBuilder *builder, + guint32 value, + GError **error) +{ + return garrow_array_builder_append_value<arrow::UInt32Builder *> + (GARROW_ARRAY_BUILDER(builder), + value, + error, + "[uint32-array-builder][append-value]"); +} + +/** + * garrow_uint32_array_builder_append_values: + * @builder: A #GArrowUInt32ArrayBuilder. + * @values: (array length=values_length): The array of uint32. + * @values_length: The length of `values`. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth `is_valids` is %TRUE, the Nth `values` is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of `is_valids`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + */ +gboolean +garrow_uint32_array_builder_append_values(GArrowUInt32ArrayBuilder *builder, + const guint32 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values<arrow::UInt32Builder *> + (GARROW_ARRAY_BUILDER(builder), + values, + values_length, + is_valids, + is_valids_length, + error, + "[uint32-array-builder][append-values]"); +} + +/** + * garrow_uint32_array_builder_append_null: (skip) + * @builder: A #GArrowUInt32ArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_uint32_array_builder_append_null(GArrowUInt32ArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_uint32_array_builder_append_nulls: (skip) + * @builder: A #GArrowUInt32ArrayBuilder. + * @n: The number of null values to be appended. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple nulls at once. It's more efficient than multiple + * `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_nulls() instead. + */ +gboolean +garrow_uint32_array_builder_append_nulls(GArrowUInt32ArrayBuilder *builder, + gint64 n, + GError **error) +{ + return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), + n, + error); +} + + +G_DEFINE_TYPE(GArrowInt64ArrayBuilder, + garrow_int64_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +static void +garrow_int64_array_builder_init(GArrowInt64ArrayBuilder *builder) +{ +} + +static void +garrow_int64_array_builder_class_init(GArrowInt64ArrayBuilderClass *klass) +{ +} + +/** + * garrow_int64_array_builder_new: + * + * Returns: A newly created #GArrowInt64ArrayBuilder. + */ +GArrowInt64ArrayBuilder * +garrow_int64_array_builder_new(void) +{ + auto builder = garrow_array_builder_new(arrow::int64(), + NULL, + "[int64-array-builder][new]"); + return GARROW_INT64_ARRAY_BUILDER(builder); +} + +/** + * garrow_int64_array_builder_append: + * @builder: A #GArrowInt64ArrayBuilder. + * @value: A int64 value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_int64_array_builder_append_value() instead. + */ +gboolean +garrow_int64_array_builder_append(GArrowInt64ArrayBuilder *builder, + gint64 value, + GError **error) +{ + return garrow_int64_array_builder_append_value(builder, value, error); +} + +/** + * garrow_int64_array_builder_append_value: + * @builder: A #GArrowInt64ArrayBuilder. + * @value: A int64 value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_int64_array_builder_append_value(GArrowInt64ArrayBuilder *builder, + gint64 value, + GError **error) +{ + return garrow_array_builder_append_value<arrow::Int64Builder *> + (GARROW_ARRAY_BUILDER(builder), + value, + error, + "[int64-array-builder][append-value]"); +} + +/** + * garrow_int64_array_builder_append_values: + * @builder: A #GArrowInt64ArrayBuilder. + * @values: (array length=values_length): The array of int64. + * @values_length: The length of `values`. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth `is_valids` is %TRUE, the Nth `values` is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of `is_valids`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + */ +gboolean +garrow_int64_array_builder_append_values(GArrowInt64ArrayBuilder *builder, + const gint64 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values<arrow::Int64Builder *> + (GARROW_ARRAY_BUILDER(builder), + reinterpret_cast<const int64_t *>(values), + values_length, + is_valids, + is_valids_length, + error, + "[int64-array-builder][append-values]"); +} + +/** + * garrow_int64_array_builder_append_null: (skip) + * @builder: A #GArrowInt64ArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_int64_array_builder_append_null(GArrowInt64ArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_int64_array_builder_append_nulls: (skip) + * @builder: A #GArrowInt64ArrayBuilder. + * @n: The number of null values to be appended. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple nulls at once. It's more efficient than multiple + * `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_nulls() instead. + */ +gboolean +garrow_int64_array_builder_append_nulls(GArrowInt64ArrayBuilder *builder, + gint64 n, + GError **error) +{ + return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), + n, + error); +} + + +G_DEFINE_TYPE(GArrowUInt64ArrayBuilder, + garrow_uint64_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +static void +garrow_uint64_array_builder_init(GArrowUInt64ArrayBuilder *builder) +{ +} + +static void +garrow_uint64_array_builder_class_init(GArrowUInt64ArrayBuilderClass *klass) +{ +} + +/** + * garrow_uint64_array_builder_new: + * + * Returns: A newly created #GArrowUInt64ArrayBuilder. + */ +GArrowUInt64ArrayBuilder * +garrow_uint64_array_builder_new(void) +{ + auto builder = garrow_array_builder_new(arrow::uint64(), + NULL, + "[uint64-array-builder][new]"); + return GARROW_UINT64_ARRAY_BUILDER(builder); +} + +/** + * garrow_uint64_array_builder_append: + * @builder: A #GArrowUInt64ArrayBuilder. + * @value: An uint64 value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_uint64_array_builder_append_value() instead. + */ +gboolean +garrow_uint64_array_builder_append(GArrowUInt64ArrayBuilder *builder, + guint64 value, + GError **error) +{ + return garrow_uint64_array_builder_append_value(builder, value, error); +} + +/** + * garrow_uint64_array_builder_append_value: + * @builder: A #GArrowUInt64ArrayBuilder. + * @value: An uint64 value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_uint64_array_builder_append_value(GArrowUInt64ArrayBuilder *builder, + guint64 value, + GError **error) +{ + return garrow_array_builder_append_value<arrow::UInt64Builder *> + (GARROW_ARRAY_BUILDER(builder), + value, + error, + "[uint64-array-builder][append-value]"); +} + +/** + * garrow_uint64_array_builder_append_values: + * @builder: A #GArrowUInt64ArrayBuilder. + * @values: (array length=values_length): The array of uint64. + * @values_length: The length of `values`. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth `is_valids` is %TRUE, the Nth `values` is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of `is_valids`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + */ +gboolean +garrow_uint64_array_builder_append_values(GArrowUInt64ArrayBuilder *builder, + const guint64 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values<arrow::UInt64Builder *> + (GARROW_ARRAY_BUILDER(builder), + reinterpret_cast<const uint64_t *>(values), + values_length, + is_valids, + is_valids_length, + error, + "[uint64-array-builder][append-values]"); +} + +/** + * garrow_uint64_array_builder_append_null: (skip) + * @builder: A #GArrowUInt64ArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_uint64_array_builder_append_null(GArrowUInt64ArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_uint64_array_builder_append_nulls: (skip) + * @builder: A #GArrowUInt64ArrayBuilder. + * @n: The number of null values to be appended. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple nulls at once. It's more efficient than multiple + * `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_nulls() instead. + */ +gboolean +garrow_uint64_array_builder_append_nulls(GArrowUInt64ArrayBuilder *builder, + gint64 n, + GError **error) +{ + return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), + n, + error); +} + + +G_DEFINE_TYPE(GArrowFloatArrayBuilder, + garrow_float_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +static void +garrow_float_array_builder_init(GArrowFloatArrayBuilder *builder) +{ +} + +static void +garrow_float_array_builder_class_init(GArrowFloatArrayBuilderClass *klass) +{ +} + +/** + * garrow_float_array_builder_new: + * + * Returns: A newly created #GArrowFloatArrayBuilder. + */ +GArrowFloatArrayBuilder * +garrow_float_array_builder_new(void) +{ + auto builder = garrow_array_builder_new(arrow::float32(), + NULL, + "[float-array-builder][new]"); + return GARROW_FLOAT_ARRAY_BUILDER(builder); +} + +/** + * garrow_float_array_builder_append: + * @builder: A #GArrowFloatArrayBuilder. + * @value: A float value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_float_array_builder_append_value() instead. + */ +gboolean +garrow_float_array_builder_append(GArrowFloatArrayBuilder *builder, + gfloat value, + GError **error) +{ + return garrow_float_array_builder_append_value(builder, value, error); +} + +/** + * garrow_float_array_builder_append_value: + * @builder: A #GArrowFloatArrayBuilder. + * @value: A float value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_float_array_builder_append_value(GArrowFloatArrayBuilder *builder, + gfloat value, + GError **error) +{ + return garrow_array_builder_append_value<arrow::FloatBuilder *> + (GARROW_ARRAY_BUILDER(builder), + value, + error, + "[float-array-builder][append-value]"); +} + +/** + * garrow_float_array_builder_append_values: + * @builder: A #GArrowFloatArrayBuilder. + * @values: (array length=values_length): The array of float. + * @values_length: The length of `values`. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth `is_valids` is %TRUE, the Nth `values` is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of `is_valids`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + */ +gboolean +garrow_float_array_builder_append_values(GArrowFloatArrayBuilder *builder, + const gfloat *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values<arrow::FloatBuilder *> + (GARROW_ARRAY_BUILDER(builder), + values, + values_length, + is_valids, + is_valids_length, + error, + "[float-array-builder][append-values]"); +} + +/** + * garrow_float_array_builder_append_null: (skip) + * @builder: A #GArrowFloatArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_float_array_builder_append_null(GArrowFloatArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_float_array_builder_append_nulls: (skip) + * @builder: A #GArrowFloatArrayBuilder. + * @n: The number of null values to be appended. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple nulls at once. It's more efficient than multiple + * `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_nulls() instead. + */ +gboolean +garrow_float_array_builder_append_nulls(GArrowFloatArrayBuilder *builder, + gint64 n, + GError **error) +{ + return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), + n, + error); +} + + +G_DEFINE_TYPE(GArrowDoubleArrayBuilder, + garrow_double_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +static void +garrow_double_array_builder_init(GArrowDoubleArrayBuilder *builder) +{ +} + +static void +garrow_double_array_builder_class_init(GArrowDoubleArrayBuilderClass *klass) +{ +} + +/** + * garrow_double_array_builder_new: + * + * Returns: A newly created #GArrowDoubleArrayBuilder. + */ +GArrowDoubleArrayBuilder * +garrow_double_array_builder_new(void) +{ + auto builder = garrow_array_builder_new(arrow::float64(), + NULL, + "[double-array-builder][new]"); + return GARROW_DOUBLE_ARRAY_BUILDER(builder); +} + +/** + * garrow_double_array_builder_append: + * @builder: A #GArrowDoubleArrayBuilder. + * @value: A double value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_double_array_builder_append_value() instead. + */ +gboolean +garrow_double_array_builder_append(GArrowDoubleArrayBuilder *builder, + gdouble value, + GError **error) +{ + return garrow_double_array_builder_append_value(builder, value, error); +} + +/** + * garrow_double_array_builder_append_value: + * @builder: A #GArrowDoubleArrayBuilder. + * @value: A double value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_double_array_builder_append_value(GArrowDoubleArrayBuilder *builder, + gdouble value, + GError **error) +{ + return garrow_array_builder_append_value<arrow::DoubleBuilder *> + (GARROW_ARRAY_BUILDER(builder), + value, + error, + "[double-array-builder][append-value]"); +} + +/** + * garrow_double_array_builder_append_values: + * @builder: A #GArrowDoubleArrayBuilder. + * @values: (array length=values_length): The array of double. + * @values_length: The length of `values`. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth `is_valids` is %TRUE, the Nth `values` is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of `is_valids`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + */ +gboolean +garrow_double_array_builder_append_values(GArrowDoubleArrayBuilder *builder, + const gdouble *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values<arrow::DoubleBuilder *> + (GARROW_ARRAY_BUILDER(builder), + values, + values_length, + is_valids, + is_valids_length, + error, + "[double-array-builder][append-values]"); +} + +/** + * garrow_double_array_builder_append_null: (skip) + * @builder: A #GArrowDoubleArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_double_array_builder_append_null(GArrowDoubleArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_double_array_builder_append_nulls: (skip) + * @builder: A #GArrowDoubleArrayBuilder. + * @n: The number of null values to be appended. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple nulls at once. It's more efficient than multiple + * `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_nulls() instead. + */ +gboolean +garrow_double_array_builder_append_nulls(GArrowDoubleArrayBuilder *builder, + gint64 n, + GError **error) +{ + return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), + n, + error); +} + + +G_DEFINE_TYPE(GArrowBinaryArrayBuilder, + garrow_binary_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +static void +garrow_binary_array_builder_init(GArrowBinaryArrayBuilder *builder) +{ +} + +static void +garrow_binary_array_builder_class_init(GArrowBinaryArrayBuilderClass *klass) +{ +} + +/** + * garrow_binary_array_builder_new: + * + * Returns: A newly created #GArrowBinaryArrayBuilder. + */ +GArrowBinaryArrayBuilder * +garrow_binary_array_builder_new(void) +{ + auto builder = garrow_array_builder_new(arrow::binary(), + NULL, + "[binary-array-builder][new]"); + return GARROW_BINARY_ARRAY_BUILDER(builder); +} + +/** + * garrow_binary_array_builder_append: + * @builder: A #GArrowBinaryArrayBuilder. + * @value: (array length=length): A binary value. + * @length: A value length. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_binary_array_builder_append_value() instead. + */ +gboolean +garrow_binary_array_builder_append(GArrowBinaryArrayBuilder *builder, + const guint8 *value, + gint32 length, + GError **error) +{ + return garrow_binary_array_builder_append_value(builder, value, length, error); +} + +/** + * garrow_binary_array_builder_append_value: + * @builder: A #GArrowBinaryArrayBuilder. + * @value: (array length=length): A binary value. + * @length: A value length. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_binary_array_builder_append_value(GArrowBinaryArrayBuilder *builder, + const guint8 *value, + gint32 length, + GError **error) +{ + auto arrow_builder = + static_cast<arrow::BinaryBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + + auto status = arrow_builder->Append(value, length); + return garrow_error_check(error, + status, + "[binary-array-builder][append-value]"); +} + +/** + * garrow_binary_array_builder_append_value_bytes: + * @builder: A #GArrowBinaryArrayBuilder. + * @value: A binary value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.16.0 + */ +gboolean +garrow_binary_array_builder_append_value_bytes(GArrowBinaryArrayBuilder *builder, + GBytes *value, + GError **error) +{ + auto arrow_builder = + static_cast<arrow::BinaryBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + + gsize size; + auto data = g_bytes_get_data(value, &size); + auto status = arrow_builder->Append(static_cast<const uint8_t *>(data), + size); + return garrow_error_check(error, + status, + "[binary-array-builder][append-value-bytes]"); +} + +/** + * garrow_binary_array_builder_append_values: + * @builder: A #GArrowBinaryArrayBuilder. + * @values: (array length=values_length): The array of #GBytes. + * @values_length: The length of @values. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth @is_valids is %TRUE, the Nth @values is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of @is_valids. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.16.0 + */ +gboolean +garrow_binary_array_builder_append_values(GArrowBinaryArrayBuilder *builder, + GBytes **values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values<arrow::BinaryBuilder *> + (GARROW_ARRAY_BUILDER(builder), + values, + values_length, + is_valids, + is_valids_length, + error, + "[binary-array-builder][append-values]"); +} + +/** + * garrow_binary_array_builder_append_null: (skip) + * @builder: A #GArrowBinaryArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_binary_array_builder_append_null(GArrowBinaryArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_binary_array_builder_append_nulls: (skip) + * @builder: A #GArrowBinaryArrayBuilder. + * @n: The number of null values to be appended. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.16.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_nulls() instead. + */ +gboolean +garrow_binary_array_builder_append_nulls(GArrowBinaryArrayBuilder *builder, + gint64 n, + GError **error) +{ + return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), + n, + error); +} + + +G_DEFINE_TYPE(GArrowLargeBinaryArrayBuilder, + garrow_large_binary_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +static void +garrow_large_binary_array_builder_init(GArrowLargeBinaryArrayBuilder *builder) +{ +} + +static void +garrow_large_binary_array_builder_class_init(GArrowLargeBinaryArrayBuilderClass *klass) +{ +} + +/** + * garrow_large_binary_array_builder_new: + * + * Returns: A newly created #GArrowLargeBinaryArrayBuilder. + * + * Since: 0.16.0 + */ +GArrowLargeBinaryArrayBuilder * +garrow_large_binary_array_builder_new(void) +{ + auto builder = garrow_array_builder_new(arrow::large_binary(), + NULL, + "[large-binary-array-builder][new]"); + return GARROW_LARGE_BINARY_ARRAY_BUILDER(builder); +} + +/** + * garrow_large_binary_array_builder_append_value: + * @builder: A #GArrowLargeBinaryArrayBuilder. + * @value: (array length=length): A binary value. + * @length: A value length. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.16.0 + */ +gboolean +garrow_large_binary_array_builder_append_value(GArrowLargeBinaryArrayBuilder *builder, + const guint8 *value, + gint64 length, + GError **error) +{ + auto arrow_builder = + static_cast<arrow::LargeBinaryBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + + auto status = arrow_builder->Append(value, length); + return garrow_error_check(error, + status, + "[large-binary-array-builder][append-value]"); +} + +/** + * garrow_large_binary_array_builder_append_value_bytes: + * @builder: A #GArrowLargeBinaryArrayBuilder. + * @value: A binary value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.16.0 + */ +gboolean +garrow_large_binary_array_builder_append_value_bytes(GArrowLargeBinaryArrayBuilder *builder, + GBytes *value, + GError **error) +{ + auto arrow_builder = + static_cast<arrow::LargeBinaryBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + + gsize size; + gconstpointer data = g_bytes_get_data(value, &size); + auto status = arrow_builder->Append(static_cast<const uint8_t *>(data), + size); + return garrow_error_check(error, + status, + "[large-binary-array-builder][append-value-bytes]"); +} + +/** + * garrow_large_binary_array_builder_append_values: + * @builder: A #GArrowLargeBinaryArrayBuilder. + * @values: (array length=values_length): The array of #GBytes. + * @values_length: The length of @values. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth @is_valids is %TRUE, the Nth @values is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of @is_valids. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.16.0 + */ +gboolean +garrow_large_binary_array_builder_append_values(GArrowLargeBinaryArrayBuilder *builder, + GBytes **values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values<arrow::LargeBinaryBuilder *> + (GARROW_ARRAY_BUILDER(builder), + values, + values_length, + is_valids, + is_valids_length, + error, + "[large-binary-array-builder][append-values]"); +} + +/** + * garrow_large_binary_array_builder_append_null: (skip) + * @builder: A #GArrowLargeBinaryArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.16.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_large_binary_array_builder_append_null(GArrowLargeBinaryArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_large_binary_array_builder_append_nulls: (skip) + * @builder: A #GArrowLargeBinaryArrayBuilder. + * @n: The number of null values to be appended. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.16.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_nulls() instead. + */ +gboolean +garrow_large_binary_array_builder_append_nulls(GArrowLargeBinaryArrayBuilder *builder, + gint64 n, + GError **error) +{ + return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), + n, + error); +} + + +G_DEFINE_TYPE(GArrowStringArrayBuilder, + garrow_string_array_builder, + GARROW_TYPE_BINARY_ARRAY_BUILDER) + +static void +garrow_string_array_builder_init(GArrowStringArrayBuilder *builder) +{ +} + +static void +garrow_string_array_builder_class_init(GArrowStringArrayBuilderClass *klass) +{ +} + +/** + * garrow_string_array_builder_new: + * + * Returns: A newly created #GArrowStringArrayBuilder. + */ +GArrowStringArrayBuilder * +garrow_string_array_builder_new(void) +{ + auto builder = garrow_array_builder_new(arrow::utf8(), + NULL, + "[string-array-builder][new]"); + return GARROW_STRING_ARRAY_BUILDER(builder); +} + +/** + * garrow_string_array_builder_append: + * @builder: A #GArrowStringArrayBuilder. + * @value: A string value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_string_array_builder_append_value() instead. + */ +gboolean +garrow_string_array_builder_append(GArrowStringArrayBuilder *builder, + const gchar *value, + GError **error) +{ + return garrow_string_array_builder_append_string(builder, value, error); +} + +/** + * garrow_string_array_builder_append_value: (skip) + * @builder: A #GArrowStringArrayBuilder. + * @value: A string value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + * + * Deprecated: 1.0.0: + * Use garrow_string_array_builder_append_string() instead. + */ +gboolean +garrow_string_array_builder_append_value(GArrowStringArrayBuilder *builder, + const gchar *value, + GError **error) +{ + return garrow_string_array_builder_append_string(builder, value, error); +} + +/** + * garrow_string_array_builder_append_string: + * @builder: A #GArrowStringArrayBuilder. + * @value: A string value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.16.0 + */ +gboolean +garrow_string_array_builder_append_string(GArrowStringArrayBuilder *builder, + const gchar *value, + GError **error) +{ + auto arrow_builder = + static_cast<arrow::StringBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + + auto status = arrow_builder->Append(value, + static_cast<gint32>(strlen(value))); + return garrow_error_check(error, + status, + "[string-array-builder][append-string]"); +} + +/** + * garrow_string_array_builder_append_values: (skip) + * @builder: A #GArrowStringArrayBuilder. + * @values: (array length=values_length): The array of strings. + * @values_length: The length of @values. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth @is_valids is %TRUE, the Nth @values is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of @is_valids. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.10.0 + * + * Deprecated: 1.0.0: + * Use garrow_string_array_builder_append_strings() instead. + */ +gboolean +garrow_string_array_builder_append_values(GArrowStringArrayBuilder *builder, + const gchar **values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_string_array_builder_append_strings(builder, + values, + values_length, + is_valids, + is_valids_length, + error); +} + +/** + * garrow_string_array_builder_append_strings: + * @builder: A #GArrowStringArrayBuilder. + * @values: (array length=values_length): The array of strings. + * @values_length: The length of @values. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth @is_valids is %TRUE, the Nth @values is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of @is_valids. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.16.0 + */ +gboolean +garrow_string_array_builder_append_strings(GArrowStringArrayBuilder *builder, + const gchar **values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values<arrow::StringBuilder *> + (GARROW_ARRAY_BUILDER(builder), + values, + values_length, + is_valids, + is_valids_length, + error, + "[string-array-builder][append-strings]"); +} + + +G_DEFINE_TYPE(GArrowLargeStringArrayBuilder, + garrow_large_string_array_builder, + GARROW_TYPE_LARGE_BINARY_ARRAY_BUILDER) + +static void +garrow_large_string_array_builder_init(GArrowLargeStringArrayBuilder *builder) +{ +} + +static void +garrow_large_string_array_builder_class_init(GArrowLargeStringArrayBuilderClass *klass) +{ +} + +/** + * garrow_large_string_array_builder_new: + * + * Returns: A newly created #GArrowLargeStringArrayBuilder. + * + * Since: 0.16.0 + */ +GArrowLargeStringArrayBuilder * +garrow_large_string_array_builder_new(void) +{ + auto builder = garrow_array_builder_new(arrow::large_utf8(), + NULL, + "[large-string-array-builder][new]"); + return GARROW_LARGE_STRING_ARRAY_BUILDER(builder); +} + +/** + * garrow_large_string_array_builder_append_string: + * @builder: A #GArrowLargeStringArrayBuilder. + * @value: A string value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.16.0 + */ +gboolean +garrow_large_string_array_builder_append_string(GArrowLargeStringArrayBuilder *builder, + const gchar *value, + GError **error) +{ + auto arrow_builder = + static_cast<arrow::LargeStringBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + auto status = arrow_builder->Append(value); + return garrow_error_check(error, + status, + "[large-string-array-builder][append-string]"); +} + +/** + * garrow_large_string_array_builder_append_strings: + * @builder: A #GArrowLargeStringArrayBuilder. + * @values: (array length=values_length): The array of strings. + * @values_length: The length of @values. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth @is_valids is %TRUE, the Nth @values is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of @is_valids. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.16.0 + */ +gboolean +garrow_large_string_array_builder_append_strings(GArrowLargeStringArrayBuilder *builder, + const gchar **values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values<arrow::LargeStringBuilder *> + (GARROW_ARRAY_BUILDER(builder), + values, + values_length, + is_valids, + is_valids_length, + error, + "[large-string-array-builder][append-strings]"); +} + + +G_DEFINE_TYPE(GArrowFixedSizeBinaryArrayBuilder, + garrow_fixed_size_binary_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +static void +garrow_fixed_size_binary_array_builder_init( + GArrowFixedSizeBinaryArrayBuilder *builder) +{ +} + +static void +garrow_fixed_size_binary_array_builder_class_init( + GArrowFixedSizeBinaryArrayBuilderClass *klass) +{ +} + +/** + * garrow_fixed_size_binary_array_builder_new: + * @data_type: A #GArrowFixedSizeBinaryDataType for created array. + * + * Returns: A newly created #GArrowFixedSizeBinaryArrayBuilder. + */ +GArrowFixedSizeBinaryArrayBuilder * +garrow_fixed_size_binary_array_builder_new( + GArrowFixedSizeBinaryDataType *data_type) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto builder = + garrow_array_builder_new(arrow_data_type, + NULL, + "[fixed-size-binary-array-builder][new]"); + return GARROW_FIXED_SIZE_BINARY_ARRAY_BUILDER(builder); +} + +/** + * garrow_fixed_size_binary_array_builder_append_value: + * @builder: A #GArrowFixedSizeBinaryArrayBuilder. + * @value: (nullable) (array length=length): A binary value. + * @length: A value length. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 3.0.0 + */ +gboolean +garrow_fixed_size_binary_array_builder_append_value( + GArrowFixedSizeBinaryArrayBuilder *builder, + const guint8 *value, + gint32 length, + GError **error) +{ + const gchar *context = "[fixed-size-binary-array-builder][append-value]"; + auto arrow_builder = + static_cast<arrow::FixedSizeBinaryBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + + arrow::Status status; + if (value) { + if (arrow_builder->byte_width() != length) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "%s: value size must be <%d>: <%d>", + context, + arrow_builder->byte_width(), + length); + return FALSE; + } + status = arrow_builder->Append(value); + } else { + status = arrow_builder->AppendNull(); + } + return garrow_error_check(error, status, context); +} + +/** + * garrow_fixed_size_binary_array_builder_append_value_bytes: + * @builder: A #GArrowFixedSizeBinaryArrayBuilder. + * @value: A binary value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 3.0.0 + */ +gboolean +garrow_fixed_size_binary_array_builder_append_value_bytes( + GArrowFixedSizeBinaryArrayBuilder *builder, + GBytes *value, + GError **error) +{ + const gchar *context = "[fixed-size-binary-array-builder][append-value-bytes]"; + auto arrow_builder = + static_cast<arrow::FixedSizeBinaryBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + + gsize size; + auto data = g_bytes_get_data(value, &size); + if (arrow_builder->byte_width() != static_cast<gint32>(size)) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "%s: value size must be <%d>: <%" G_GSIZE_FORMAT ">", + context, + arrow_builder->byte_width(), + size); + return FALSE; + } + auto status = arrow_builder->Append(static_cast<const uint8_t *>(data)); + return garrow_error_check(error, status, context); +} + +/** + * garrow_fixed_size_binary_array_builder_append_values: + * @builder: A #GArrowFixedSizeBinaryArrayBuilder. + * @values: (array length=values_length): The array of #GBytes. + * @values_length: The length of @values. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth @is_valids is %TRUE, the Nth @values is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of @is_valids. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 3.0.0 + */ +gboolean +garrow_fixed_size_binary_array_builder_append_values( + GArrowFixedSizeBinaryArrayBuilder *builder, + GBytes **values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values( + GARROW_ARRAY_BUILDER(builder), + values, + values_length, + is_valids, + is_valids_length, + error, + "[fixed-size-binary-array-builder][append-values]", + [](guint8 *output, GBytes *value, gsize size) { + size_t data_size; + auto raw_data = g_bytes_get_data(value, &data_size); + memcpy(output, raw_data, size); + }); +} + +/** + * garrow_fixed_size_binary_array_builder_append_values_packed: + * @builder: A #GArrowFixedSizeBinaryArrayBuilder. + * @values: A #GBytes that contains multiple values. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth @is_valids is %TRUE, the Nth @values is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of @is_valids. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * This is more efficient than + * garrow_fixed_size_binary_array_builder_append_values(). + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 3.0.0 + */ +gboolean +garrow_fixed_size_binary_array_builder_append_values_packed( + GArrowFixedSizeBinaryArrayBuilder *builder, + GBytes *values, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values<arrow::FixedSizeBinaryBuilder *> + (GARROW_ARRAY_BUILDER(builder), + values, + is_valids, + is_valids_length, + error, + "[fixed-size-binary-array-builder][append-values-packed]"); +} + + +G_DEFINE_TYPE(GArrowDate32ArrayBuilder, + garrow_date32_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +static void +garrow_date32_array_builder_init(GArrowDate32ArrayBuilder *builder) +{ +} + +static void +garrow_date32_array_builder_class_init(GArrowDate32ArrayBuilderClass *klass) +{ +} + +/** + * garrow_date32_array_builder_new: + * + * Returns: A newly created #GArrowDate32ArrayBuilder. + * + * Since: 0.7.0 + */ +GArrowDate32ArrayBuilder * +garrow_date32_array_builder_new(void) +{ + auto builder = garrow_array_builder_new(arrow::date32(), + NULL, + "[date32-array-builder][new]"); + return GARROW_DATE32_ARRAY_BUILDER(builder); +} + +/** + * garrow_date32_array_builder_append: + * @builder: A #GArrowDate32ArrayBuilder. + * @value: The number of days since UNIX epoch in signed 32bit integer. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.7.0 + * + * Deprecated: 0.12.0: + * Use garrow_date32_array_builder_append_value() instead. + */ +gboolean +garrow_date32_array_builder_append(GArrowDate32ArrayBuilder *builder, + gint32 value, + GError **error) +{ + return garrow_date32_array_builder_append_value(builder, value, error); +} + +/** + * garrow_date32_array_builder_append_value: + * @builder: A #GArrowDate32ArrayBuilder. + * @value: The number of days since UNIX epoch in signed 32bit integer. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_date32_array_builder_append_value(GArrowDate32ArrayBuilder *builder, + gint32 value, + GError **error) +{ + return garrow_array_builder_append_value<arrow::Date32Builder *> + (GARROW_ARRAY_BUILDER(builder), + value, + error, + "[date32-array-builder][append-value]"); +} + +/** + * garrow_date32_array_builder_append_values: + * @builder: A #GArrowDate32ArrayBuilder. + * @values: (array length=values_length): The array of + * the number of days since UNIX epoch in signed 32bit integer. + * @values_length: The length of `values`. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth `is_valids` is %TRUE, the Nth `values` is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of `is_valids`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + */ +gboolean +garrow_date32_array_builder_append_values(GArrowDate32ArrayBuilder *builder, + const gint32 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values<arrow::Date32Builder *> + (GARROW_ARRAY_BUILDER(builder), + values, + values_length, + is_valids, + is_valids_length, + error, + "[date32-array-builder][append-values]"); +} + +/** + * garrow_date32_array_builder_append_null: (skip) + * @builder: A #GArrowDate32ArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.7.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_date32_array_builder_append_null(GArrowDate32ArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_date32_array_builder_append_nulls: (skip) + * @builder: A #GArrowDate32ArrayBuilder. + * @n: The number of null values to be appended. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple nulls at once. It's more efficient than multiple + * `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_nulls() instead. + */ +gboolean +garrow_date32_array_builder_append_nulls(GArrowDate32ArrayBuilder *builder, + gint64 n, + GError **error) +{ + return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), + n, + error); +} + + +G_DEFINE_TYPE(GArrowDate64ArrayBuilder, + garrow_date64_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +static void +garrow_date64_array_builder_init(GArrowDate64ArrayBuilder *builder) +{ +} + +static void +garrow_date64_array_builder_class_init(GArrowDate64ArrayBuilderClass *klass) +{ +} + +/** + * garrow_date64_array_builder_new: + * + * Returns: A newly created #GArrowDate64ArrayBuilder. + * + * Since: 0.7.0 + */ +GArrowDate64ArrayBuilder * +garrow_date64_array_builder_new(void) +{ + auto builder = garrow_array_builder_new(arrow::date64(), + NULL, + "[date64-array-builder][new]"); + return GARROW_DATE64_ARRAY_BUILDER(builder); +} + +/** + * garrow_date64_array_builder_append: + * @builder: A #GArrowDate64ArrayBuilder. + * @value: The number of milliseconds since UNIX epoch in signed 64bit integer. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.7.0 + * + * Deprecated: 0.12.0: + * Use garrow_date64_array_builder_append_value() instead. + */ +gboolean +garrow_date64_array_builder_append(GArrowDate64ArrayBuilder *builder, + gint64 value, + GError **error) +{ + return garrow_date64_array_builder_append_value(builder, value, error); +} + +/** + * garrow_date64_array_builder_append_value: + * @builder: A #GArrowDate64ArrayBuilder. + * @value: The number of milliseconds since UNIX epoch in signed 64bit integer. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_date64_array_builder_append_value(GArrowDate64ArrayBuilder *builder, + gint64 value, + GError **error) +{ + return garrow_array_builder_append_value<arrow::Date64Builder *> + (GARROW_ARRAY_BUILDER(builder), + value, + error, + "[date64-array-builder][append-value]"); +} + +/** + * garrow_date64_array_builder_append_values: + * @builder: A #GArrowDate64ArrayBuilder. + * @values: (array length=values_length): The array of + * the number of milliseconds since UNIX epoch in signed 64bit integer. + * @values_length: The length of `values`. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth `is_valids` is %TRUE, the Nth `values` is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of `is_valids`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + */ +gboolean +garrow_date64_array_builder_append_values(GArrowDate64ArrayBuilder *builder, + const gint64 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values<arrow::Date64Builder *> + (GARROW_ARRAY_BUILDER(builder), + reinterpret_cast<const int64_t *>(values), + values_length, + is_valids, + is_valids_length, + error, + "[date64-array-builder][append-values]"); +} + +/** + * garrow_date64_array_builder_append_null: (skip) + * @builder: A #GArrowDate64ArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.7.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_date64_array_builder_append_null(GArrowDate64ArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_date64_array_builder_append_nulls: (skip) + * @builder: A #GArrowDate64ArrayBuilder. + * @n: The number of null values to be appended. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple nulls at once. It's more efficient than multiple + * `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_nulls() instead. + */ +gboolean +garrow_date64_array_builder_append_nulls(GArrowDate64ArrayBuilder *builder, + gint64 n, + GError **error) +{ + return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), + n, + error); +} + + +G_DEFINE_TYPE(GArrowTimestampArrayBuilder, + garrow_timestamp_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +static void +garrow_timestamp_array_builder_init(GArrowTimestampArrayBuilder *builder) +{ +} + +static void +garrow_timestamp_array_builder_class_init(GArrowTimestampArrayBuilderClass *klass) +{ +} + +/** + * garrow_timestamp_array_builder_new: + * @data_type: A #GArrowTimestampDataType. + * + * Returns: A newly created #GArrowTimestampArrayBuilder. + * + * Since: 0.7.0 + */ +GArrowTimestampArrayBuilder * +garrow_timestamp_array_builder_new(GArrowTimestampDataType *data_type) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto builder = garrow_array_builder_new(arrow_data_type, + NULL, + "[timestamp-array-builder][new]"); + return GARROW_TIMESTAMP_ARRAY_BUILDER(builder); +} + +/** + * garrow_timestamp_array_builder_append: + * @builder: A #GArrowTimestampArrayBuilder. + * @value: The number of milliseconds since UNIX epoch in signed 64bit integer. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.7.0 + * + * Deprecated: 0.12.0: + * Use garrow_timestamp_array_builder_append_value() instead. + */ +gboolean +garrow_timestamp_array_builder_append(GArrowTimestampArrayBuilder *builder, + gint64 value, + GError **error) +{ + return garrow_timestamp_array_builder_append_value(builder, value, error); +} + +/** + * garrow_timestamp_array_builder_append_value: + * @builder: A #GArrowTimestampArrayBuilder. + * @value: The number of milliseconds since UNIX epoch in signed 64bit integer. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_timestamp_array_builder_append_value(GArrowTimestampArrayBuilder *builder, + gint64 value, + GError **error) +{ + return garrow_array_builder_append_value<arrow::TimestampBuilder *> + (GARROW_ARRAY_BUILDER(builder), + value, + error, + "[timestamp-array-builder][append-value]"); +} + +/** + * garrow_timestamp_array_builder_append_values: + * @builder: A #GArrowTimestampArrayBuilder. + * @values: (array length=values_length): The array of + * the number of milliseconds since UNIX epoch in signed 64bit integer. + * @values_length: The length of `values`. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth `is_valids` is %TRUE, the Nth `values` is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of `is_valids`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + */ +gboolean +garrow_timestamp_array_builder_append_values(GArrowTimestampArrayBuilder *builder, + const gint64 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values<arrow::TimestampBuilder *> + (GARROW_ARRAY_BUILDER(builder), + reinterpret_cast<const int64_t *>(values), + values_length, + is_valids, + is_valids_length, + error, + "[timestamp-array-builder][append-values]"); +} + +/** + * garrow_timestamp_array_builder_append_null: (skip) + * @builder: A #GArrowTimestampArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.7.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_timestamp_array_builder_append_null(GArrowTimestampArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_timestamp_array_builder_append_nulls: (skip) + * @builder: A #GArrowTimestampArrayBuilder. + * @n: The number of null values to be appended. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple nulls at once. It's more efficient than multiple + * `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_nulls() instead. + */ +gboolean +garrow_timestamp_array_builder_append_nulls(GArrowTimestampArrayBuilder *builder, + gint64 n, + GError **error) +{ + return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), + n, + error); +} + + +G_DEFINE_TYPE(GArrowTime32ArrayBuilder, + garrow_time32_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +static void +garrow_time32_array_builder_init(GArrowTime32ArrayBuilder *builder) +{ +} + +static void +garrow_time32_array_builder_class_init(GArrowTime32ArrayBuilderClass *klass) +{ +} + +/** + * garrow_time32_array_builder_new: + * @data_type: A #GArrowTime32DataType. + * + * Returns: A newly created #GArrowTime32ArrayBuilder. + * + * Since: 0.7.0 + */ +GArrowTime32ArrayBuilder * +garrow_time32_array_builder_new(GArrowTime32DataType *data_type) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto builder = garrow_array_builder_new(arrow_data_type, + NULL, + "[time32-array-builder][new]"); + return GARROW_TIME32_ARRAY_BUILDER(builder); +} + +/** + * garrow_time32_array_builder_append: + * @builder: A #GArrowTime32ArrayBuilder. + * @value: The number of days since UNIX epoch in signed 32bit integer. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.7.0 + * + * Deprecated: 0.12.0: + * Use garrow_time32_array_builder_append_value() instead. + */ +gboolean +garrow_time32_array_builder_append(GArrowTime32ArrayBuilder *builder, + gint32 value, + GError **error) +{ + return garrow_time32_array_builder_append_value(builder, value, error); +} + +/** + * garrow_time32_array_builder_append_value: + * @builder: A #GArrowTime32ArrayBuilder. + * @value: The number of days since UNIX epoch in signed 32bit integer. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_time32_array_builder_append_value(GArrowTime32ArrayBuilder *builder, + gint32 value, + GError **error) +{ + return garrow_array_builder_append_value<arrow::Time32Builder *> + (GARROW_ARRAY_BUILDER(builder), + value, + error, + "[time32-array-builder][append-value]"); +} + +/** + * garrow_time32_array_builder_append_values: + * @builder: A #GArrowTime32ArrayBuilder. + * @values: (array length=values_length): The array of + * the number of days since UNIX epoch in signed 32bit integer. + * @values_length: The length of `values`. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth `is_valids` is %TRUE, the Nth `values` is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of `is_valids`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + */ +gboolean +garrow_time32_array_builder_append_values(GArrowTime32ArrayBuilder *builder, + const gint32 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values<arrow::Time32Builder *> + (GARROW_ARRAY_BUILDER(builder), + values, + values_length, + is_valids, + is_valids_length, + error, + "[time32-array-builder][append-values]"); +} + +/** + * garrow_time32_array_builder_append_null: (skip) + * @builder: A #GArrowTime32ArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.7.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_time32_array_builder_append_null(GArrowTime32ArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_time32_array_builder_append_nulls: (skip) + * @builder: A #GArrowTime32ArrayBuilder. + * @n: The number of null values to be appended. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple nulls at once. It's more efficient than multiple + * `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_nulls() instead. + */ +gboolean +garrow_time32_array_builder_append_nulls(GArrowTime32ArrayBuilder *builder, + gint64 n, + GError **error) +{ + return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), + n, + error); +} + + +G_DEFINE_TYPE(GArrowTime64ArrayBuilder, + garrow_time64_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +static void +garrow_time64_array_builder_init(GArrowTime64ArrayBuilder *builder) +{ +} + +static void +garrow_time64_array_builder_class_init(GArrowTime64ArrayBuilderClass *klass) +{ +} + +/** + * garrow_time64_array_builder_new: + * @data_type: A #GArrowTime64DataType. + * + * Returns: A newly created #GArrowTime64ArrayBuilder. + * + * Since: 0.7.0 + */ +GArrowTime64ArrayBuilder * +garrow_time64_array_builder_new(GArrowTime64DataType *data_type) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto builder = garrow_array_builder_new(arrow_data_type, + NULL, + "[time64-array-builder][new]"); + return GARROW_TIME64_ARRAY_BUILDER(builder); +} + +/** + * garrow_time64_array_builder_append: + * @builder: A #GArrowTime64ArrayBuilder. + * @value: The number of milliseconds since UNIX epoch in signed 64bit integer. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.7.0 + * + * Deprecated: 0.12.0: + * Use garrow_time64_array_builder_append_value() instead. + */ +gboolean +garrow_time64_array_builder_append(GArrowTime64ArrayBuilder *builder, + gint64 value, + GError **error) +{ + return garrow_time64_array_builder_append_value(builder, value, error); +} + +/** + * garrow_time64_array_builder_append_value: + * @builder: A #GArrowTime64ArrayBuilder. + * @value: The number of milliseconds since UNIX epoch in signed 64bit integer. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_time64_array_builder_append_value(GArrowTime64ArrayBuilder *builder, + gint64 value, + GError **error) +{ + return garrow_array_builder_append_value<arrow::Time64Builder *> + (GARROW_ARRAY_BUILDER(builder), + value, + error, + "[time64-array-builder][append-value]"); +} + +/** + * garrow_time64_array_builder_append_values: + * @builder: A #GArrowTime64ArrayBuilder. + * @values: (array length=values_length): The array of + * the number of milliseconds since UNIX epoch in signed 64bit integer. + * @values_length: The length of `values`. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth `is_valids` is %TRUE, the Nth `values` is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of `is_valids`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + */ +gboolean +garrow_time64_array_builder_append_values(GArrowTime64ArrayBuilder *builder, + const gint64 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values<arrow::Time64Builder *> + (GARROW_ARRAY_BUILDER(builder), + reinterpret_cast<const int64_t *>(values), + values_length, + is_valids, + is_valids_length, + error, + "[time64-array-builder][append-values]"); +} + +/** + * garrow_time64_array_builder_append_null: (skip) + * @builder: A #GArrowTime64ArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.7.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_time64_array_builder_append_null(GArrowTime64ArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_time64_array_builder_append_nulls: (skip) + * @builder: A #GArrowTime64ArrayBuilder. + * @n: The number of null values to be appended. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple nulls at once. It's more efficient than multiple + * `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_nulls() instead. + */ +gboolean +garrow_time64_array_builder_append_nulls(GArrowTime64ArrayBuilder *builder, + gint64 n, + GError **error) +{ + return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), + n, + error); +} + + +G_DEFINE_TYPE(GArrowBinaryDictionaryArrayBuilder, + garrow_binary_dictionary_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +static void +garrow_binary_dictionary_array_builder_init(GArrowBinaryDictionaryArrayBuilder *builder) +{ +} + +static void +garrow_binary_dictionary_array_builder_class_init(GArrowBinaryDictionaryArrayBuilderClass *klass) +{ +} + + +/** + * garrow_binary_dictionary_array_builder_new: + * + * Returns: A newly created #GArrowBinaryDictionaryArrayBuilder. + * + * Since: 2.0.0 + */ +GArrowBinaryDictionaryArrayBuilder * +garrow_binary_dictionary_array_builder_new(void) +{ + // We can use arrow:int8() for the index type of the following arrow_dict_type + // because arrow::MakeBuilder creates a dictionary builder with arrow::AdaptiveIntBuilder. + auto arrow_dict_type = arrow::dictionary(arrow::int8(), arrow::binary()); + auto builder = garrow_array_builder_new(arrow_dict_type, + nullptr, + "[binary-dictionary-array-builder][new]"); + return GARROW_BINARY_DICTIONARY_ARRAY_BUILDER(builder); +} + +/** + * garrow_binary_dictionary_array_builder_append_null: (skip) + * @builder: A #GArrowBinaryDictionaryArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 2.0.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_binary_dictionary_array_builder_append_null(GArrowBinaryDictionaryArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_binary_dictionary_array_builder_append_value: + * @builder: A #GArrowBinaryDictionaryArrayBuilder. + * @value: (array length=length): A binary value. + * @length: A value length. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 2.0.0 + */ +gboolean +garrow_binary_dictionary_array_builder_append_value(GArrowBinaryDictionaryArrayBuilder *builder, + const guint8 *value, + gint32 length, + GError **error) +{ + auto arrow_builder = + static_cast<arrow::BinaryDictionaryBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + + auto status = arrow_builder->Append(value, length); + + return garrow_error_check(error, + status, + "[binary-dictionary-array-builder][append-value]"); +} + +/** + * garrow_binary_dictionary_array_builder_append_value_bytes: + * @builder: A #GArrowBinaryDictionaryArrayBuilder. + * @value: A binary value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 2.0.0 + */ +gboolean +garrow_binary_dictionary_array_builder_append_value_bytes(GArrowBinaryDictionaryArrayBuilder *builder, + GBytes *value, + GError **error) +{ + auto arrow_builder = + static_cast<arrow::BinaryDictionaryBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + + gsize size; + auto data = g_bytes_get_data(value, &size); + auto status = arrow_builder->Append(static_cast<const uint8_t *>(data), + size); + + return garrow_error_check(error, + status, + "[binary-dictionary-array-builder][append-value-bytes]"); +} + +/** + * garrow_binary_dictionary_array_builder_append_array: + * @builder: A #GArrowBinaryDictionaryArrayBuilder. + * @array: A #GArrowBinaryArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 2.0.0 + */ +gboolean +garrow_binary_dictionary_array_builder_append_array(GArrowBinaryDictionaryArrayBuilder *builder, + GArrowBinaryArray *array, + GError **error) +{ + auto arrow_builder = + static_cast<arrow::BinaryDictionaryBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + auto arrow_array = garrow_array_get_raw<arrow::BinaryType>(GARROW_ARRAY(array)); + + auto status = arrow_builder->AppendArray(*arrow_array); + + return garrow_error_check(error, + status, + "[binary-dictionary-array-builder][append-binary-array]"); +} + +/** + * garrow_binary_dictionary_array_builder_append_indices: + * @builder: A #GArrowBinaryDictionaryArrayBuilder. + * @values: (array length=values_length): The array of indices. + * @values_length: The length of `values`. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * %TRUE or %FALSE that shows whether the Nth value is valid or not. If the + * Nth `is_valids` is %TRUE, the Nth `values` is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of `is_valids`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append dictionary indices directly without modifying the internal memo. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 2.0.0 + */ +gboolean +garrow_binary_dictionary_array_builder_append_indices(GArrowBinaryDictionaryArrayBuilder *builder, + const gint64 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + static const char *context = "[binary-dictionary-array-builder][append-indices]"; + auto arrow_builder = + static_cast<arrow::BinaryDictionaryBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + auto append_function = [&arrow_builder]( + const gint64 *values, + gint64 values_length, + const uint8_t *valid_bytes) -> arrow::Status { + return arrow_builder->AppendIndices(values, values_length, valid_bytes); + }; + return garrow_array_builder_append_values(values, values_length, is_valids, + is_valids_length, error, context, + append_function); +} + +/** + * garrow_binary_dictionary_array_builder_get_dictionary_length: + * @builder: A #GArrowBinaryDictionaryArrayBuilder. + * + * Returns: A number of entries in the dicitonary. + * + * Since: 2.0.0 + */ +gint64 garrow_binary_dictionary_array_builder_get_dictionary_length(GArrowBinaryDictionaryArrayBuilder *builder) +{ + auto arrow_builder = + static_cast<arrow::BinaryDictionaryBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + return arrow_builder->dictionary_length(); +} + +/** + * garrow_binary_dictionary_array_builder_finish_delta: + * @builder: A #GArrowBinaryDictionaryArrayBuilder. + * @out_indices: (out): The built #GArrowArray containing indices. + * @out_delta: (out): The built #GArrowArray containing dictionary. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 2.0.0 + */ +gboolean +garrow_binary_dictionary_array_builder_finish_delta(GArrowBinaryDictionaryArrayBuilder* builder, + GArrowArray **out_indices, + GArrowArray **out_delta, + GError **error) +{ + static const char *context = "[binary-dictionary-array-builder][finish-delta]"; + auto arrow_builder = + static_cast<arrow::BinaryDictionaryBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + std::shared_ptr<arrow::Array> arrow_indices, arrow_delta; + auto status = arrow_builder->FinishDelta(&arrow_indices, &arrow_delta); + if (!garrow_error_check(error, status, context)) { + return FALSE; + } + *out_indices = garrow_array_new_raw(&arrow_indices); + *out_delta = garrow_array_new_raw(&arrow_delta); + return TRUE; +} + +/** + * garrow_binary_dictionary_array_builder_insert_memo_values: + * @builder: A #GArrowBinaryDictionaryArrayBuilder. + * @values: A #GArrowBinaryArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 2.0.0 + */ +gboolean +garrow_binary_dictionary_array_builder_insert_memo_values(GArrowBinaryDictionaryArrayBuilder *builder, + GArrowBinaryArray *values, + GError **error) +{ + auto arrow_builder = + static_cast<arrow::BinaryDictionaryBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + auto arrow_array = garrow_array_get_raw<arrow::BinaryType>(GARROW_ARRAY(values)); + + auto status = arrow_builder->InsertMemoValues(*arrow_array); + + return garrow_error_check(error, + status, + "[binary-dictionary-array-builder][insert-memo-values]"); +} + +/** + * garrow_binary_dictionary_array_builder_reset_full: + * @builder: A #GArrowBinaryDictionaryArrayBuilder. + * + * Reset and also clear accumulated dictionary values in memo table. + * + * Since: 2.0.0 + */ +void +garrow_binary_dictionary_array_builder_reset_full(GArrowBinaryDictionaryArrayBuilder *builder) +{ + auto arrow_builder = + static_cast<arrow::BinaryDictionaryBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + arrow_builder->ResetFull(); +} + + +G_DEFINE_TYPE(GArrowStringDictionaryArrayBuilder, + garrow_string_dictionary_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +static void +garrow_string_dictionary_array_builder_init(GArrowStringDictionaryArrayBuilder *builder) +{ +} + +static void +garrow_string_dictionary_array_builder_class_init(GArrowStringDictionaryArrayBuilderClass *klass) +{ +} + + +/** + * garrow_string_dictionary_array_builder_new: + * + * Returns: A newly created #GArrowStringDictionaryArrayBuilder. + * + * Since: 2.0.0 + */ +GArrowStringDictionaryArrayBuilder * +garrow_string_dictionary_array_builder_new(void) +{ + // We can use arrow:int8() for the index type of the following arrow_dict_type + // because arrow::MakeBuilder creates a dictionary builder with arrow::AdaptiveIntBuilder. + auto arrow_dict_type = arrow::dictionary(arrow::int8(), arrow::utf8()); + auto builder = garrow_array_builder_new(arrow_dict_type, + nullptr, + "[string-dictionary-array-builder][new]"); + return GARROW_STRING_DICTIONARY_ARRAY_BUILDER(builder); +} + +/** + * garrow_string_dictionary_array_builder_append_null: (skip) + * @builder: A #GArrowStringDictionaryArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 2.0.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_string_dictionary_array_builder_append_null(GArrowStringDictionaryArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_string_dictionary_array_builder_append_string: + * @builder: A #GArrowStringDictionaryArrayBuilder. + * @value: A string value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 2.0.0 + */ +gboolean +garrow_string_dictionary_array_builder_append_string(GArrowStringDictionaryArrayBuilder *builder, + const gchar *value, + GError **error) +{ + auto arrow_builder = + static_cast<arrow::StringDictionaryBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + + auto status = arrow_builder->Append(value, + static_cast<guint32>(strlen(value))); + + return garrow_error_check(error, + status, + "[string-dictionary-array-builder][append-string]"); +} + +/** + * garrow_string_dictionary_array_builder_append_array: + * @builder: A #GArrowStringDictionaryArrayBuilder. + * @array: A #GArrowStringArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 2.0.0 + */ +gboolean +garrow_string_dictionary_array_builder_append_array(GArrowStringDictionaryArrayBuilder *builder, + GArrowStringArray *array, + GError **error) +{ + auto arrow_builder = + static_cast<arrow::StringDictionaryBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + auto arrow_array = garrow_array_get_raw<arrow::StringType>(GARROW_ARRAY(array)); + + auto status = arrow_builder->AppendArray(*arrow_array); + + return garrow_error_check(error, + status, + "[string-dictionary-array-builder][append-string-array]"); +} + +/** + * garrow_string_dictionary_array_builder_append_indices: + * @builder: A #GArrowStringDictionaryArrayBuilder. + * @values: (array length=values_length): The array of indices. + * @values_length: The length of `values`. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * %TRUE or %FALSE that shows whether the Nth value is valid or not. If the + * Nth `is_valids` is %TRUE, the Nth `values` is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of `is_valids`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append dictionary indices directly without modifying the internal memo. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 2.0.0 + */ +gboolean +garrow_string_dictionary_array_builder_append_indices(GArrowStringDictionaryArrayBuilder *builder, + const gint64 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + static const char *context = "[string-dictionary-array-builder][append-indices]"; + auto arrow_builder = + static_cast<arrow::StringDictionaryBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + auto append_function = [&arrow_builder]( + const gint64 *values, + gint64 values_length, + const uint8_t *valid_bytes) -> arrow::Status { + return arrow_builder->AppendIndices(values, values_length, valid_bytes); + }; + return garrow_array_builder_append_values(values, values_length, is_valids, + is_valids_length, error, context, + append_function); +} + +/** + * garrow_string_dictionary_array_builder_get_dictionary_length: + * @builder: A #GArrowStringDictionaryArrayBuilder. + * + * Returns: A number of entries in the dicitonary. + * + * Since: 2.0.0 + */ +gint64 garrow_string_dictionary_array_builder_get_dictionary_length(GArrowStringDictionaryArrayBuilder *builder) +{ + auto arrow_builder = + static_cast<arrow::StringDictionaryBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + return arrow_builder->dictionary_length(); +} + +/** + * garrow_string_dictionary_array_builder_finish_delta: + * @builder: A #GArrowStringDictionaryArrayBuilder. + * @out_indices: (out): The built #GArrowArray containing indices. + * @out_delta: (out): The built #GArrowArray containing dictionary. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 2.0.0 + */ +gboolean +garrow_string_dictionary_array_builder_finish_delta(GArrowStringDictionaryArrayBuilder* builder, + GArrowArray **out_indices, + GArrowArray **out_delta, + GError **error) +{ + static const char *context = "[string-dictionary-array-builder][finish-delta]"; + auto arrow_builder = + static_cast<arrow::StringDictionaryBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + std::shared_ptr<arrow::Array> arrow_indices, arrow_delta; + auto status = arrow_builder->FinishDelta(&arrow_indices, &arrow_delta); + if (!garrow_error_check(error, status, context)) { + return FALSE; + } + *out_indices = garrow_array_new_raw(&arrow_indices); + *out_delta = garrow_array_new_raw(&arrow_delta); + return TRUE; +} + +/** + * garrow_string_dictionary_array_builder_insert_memo_values: + * @builder: A #GArrowStringDictionaryArrayBuilder. + * @values: A #GArrowStringArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 2.0.0 + */ +gboolean +garrow_string_dictionary_array_builder_insert_memo_values(GArrowStringDictionaryArrayBuilder *builder, + GArrowStringArray *values, + GError **error) +{ + auto arrow_builder = + static_cast<arrow::StringDictionaryBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + auto arrow_array = garrow_array_get_raw<arrow::StringType>(GARROW_ARRAY(values)); + + auto status = arrow_builder->InsertMemoValues(*arrow_array); + + return garrow_error_check(error, + status, + "[string-dictionary-array-builder][insert-memo-values]"); +} + +/** + * garrow_string_dictionary_array_builder_reset_full: + * @builder: A #GArrowStringDictionaryArrayBuilder. + * + * Reset and also clear accumulated dictionary values in memo table. + * + * Since: 2.0.0 + */ +void +garrow_string_dictionary_array_builder_reset_full(GArrowStringDictionaryArrayBuilder *builder) +{ + auto arrow_builder = + static_cast<arrow::StringDictionaryBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + arrow_builder->ResetFull(); +} + + +typedef struct GArrowListArrayBuilderPrivate_ { + GArrowArrayBuilder *value_builder; +} GArrowListArrayBuilderPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowListArrayBuilder, + garrow_list_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +#define GARROW_LIST_ARRAY_BUILDER_GET_PRIVATE(obj) \ + static_cast<GArrowListArrayBuilderPrivate *>( \ + garrow_list_array_builder_get_instance_private( \ + GARROW_LIST_ARRAY_BUILDER(obj))) + +static void +garrow_list_array_builder_dispose(GObject *object) +{ + auto priv = GARROW_LIST_ARRAY_BUILDER_GET_PRIVATE(object); + + if (priv->value_builder) { + g_object_unref(priv->value_builder); + priv->value_builder = NULL; + } + + G_OBJECT_CLASS(garrow_list_array_builder_parent_class)->dispose(object); +} + +static void +garrow_list_array_builder_init(GArrowListArrayBuilder *builder) +{ +} + +static void +garrow_list_array_builder_class_init(GArrowListArrayBuilderClass *klass) +{ + GObjectClass *gobject_class; + + gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_list_array_builder_dispose; +} + +/** + * garrow_list_array_builder_new: + * @data_type: A #GArrowListDataType for value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: A newly created #GArrowListArrayBuilder. + */ +GArrowListArrayBuilder * +garrow_list_array_builder_new(GArrowListDataType *data_type, + GError **error) +{ + if (!GARROW_IS_LIST_DATA_TYPE(data_type)) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "[list-array-builder][new] data type must be list data type"); + return NULL; + } + + auto arrow_data_type = + garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto builder = garrow_array_builder_new(arrow_data_type, + error, + "[list-array-builder][new]"); + return GARROW_LIST_ARRAY_BUILDER(builder); +} + +/** + * garrow_list_array_builder_append: + * @builder: A #GArrowListArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * It appends a new list element. To append a new list element, you + * need to call this function then append list element values to + * `value_builder`. `value_builder` is the #GArrowArrayBuilder + * specified to constructor. You can get `value_builder` by + * garrow_list_array_builder_get_value_builder(). + * + * |[<!-- language="C" --> + * GArrowInt8ArrayBuilder *value_builder; + * GArrowListArrayBuilder *builder; + * + * value_builder = garrow_int8_array_builder_new(); + * builder = garrow_list_array_builder_new(value_builder, NULL); + * + * // Start 0th list element: [1, 0, -1] + * garrow_list_array_builder_append(builder, NULL); + * garrow_int8_array_builder_append(value_builder, 1); + * garrow_int8_array_builder_append(value_builder, 0); + * garrow_int8_array_builder_append(value_builder, -1); + * + * // Start 1st list element: [-29, 29] + * garrow_list_array_builder_append(builder, NULL); + * garrow_int8_array_builder_append(value_builder, -29); + * garrow_int8_array_builder_append(value_builder, 29); + * + * { + * // [[1, 0, -1], [-29, 29]] + * GArrowArray *array = garrow_array_builder_finish(builder); + * // Now, builder is needless. + * g_object_unref(builder); + * g_object_unref(value_builder); + * + * // Use array... + * g_object_unref(array); + * } + * ]| + * + * Deprecated: 0.12.0: + * Use garrow_list_array_builder_append_value() instead. + */ +gboolean +garrow_list_array_builder_append(GArrowListArrayBuilder *builder, + GError **error) +{ + return garrow_list_array_builder_append_value(builder, error); +} + +/** + * garrow_list_array_builder_append_value: + * @builder: A #GArrowListArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * It appends a new list element. To append a new list element, you + * need to call this function then append list element values to + * `value_builder`. `value_builder` is the #GArrowArrayBuilder + * specified to constructor. You can get `value_builder` by + * garrow_list_array_builder_get_value_builder(). + * + * |[<!-- language="C" --> + * GArrowInt8ArrayBuilder *value_builder; + * GArrowListArrayBuilder *builder; + * + * value_builder = garrow_int8_array_builder_new(); + * builder = garrow_list_array_builder_new(value_builder, NULL); + * + * // Start 0th list element: [1, 0, -1] + * garrow_list_array_builder_append(builder, NULL); + * garrow_int8_array_builder_append(value_builder, 1); + * garrow_int8_array_builder_append(value_builder, 0); + * garrow_int8_array_builder_append(value_builder, -1); + * + * // Start 1st list element: [-29, 29] + * garrow_list_array_builder_append(builder, NULL); + * garrow_int8_array_builder_append(value_builder, -29); + * garrow_int8_array_builder_append(value_builder, 29); + * + * { + * // [[1, 0, -1], [-29, 29]] + * GArrowArray *array = garrow_array_builder_finish(builder); + * // Now, builder is needless. + * g_object_unref(builder); + * g_object_unref(value_builder); + * + * // Use array... + * g_object_unref(array); + * } + * ]| + * + * Since: 0.12.0 + */ +gboolean +garrow_list_array_builder_append_value(GArrowListArrayBuilder *builder, + GError **error) +{ + auto arrow_builder = + static_cast<arrow::ListBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + + auto status = arrow_builder->Append(); + return garrow_error_check(error, status, "[list-array-builder][append-value]"); +} + +/** + * garrow_list_array_builder_append_null: (skip) + * @builder: A #GArrowListArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * It appends a new NULL element. + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_list_array_builder_append_null(GArrowListArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_list_array_builder_get_value_builder: + * @builder: A #GArrowListArrayBuilder. + * + * Returns: (transfer none): The #GArrowArrayBuilder for values. + */ +GArrowArrayBuilder * +garrow_list_array_builder_get_value_builder(GArrowListArrayBuilder *builder) +{ + auto priv = GARROW_LIST_ARRAY_BUILDER_GET_PRIVATE(builder); + if (!priv->value_builder) { + auto arrow_builder = + static_cast<arrow::ListBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + auto arrow_value_builder = arrow_builder->value_builder(); + priv->value_builder = garrow_array_builder_new_raw(arrow_value_builder); + garrow_array_builder_release_ownership(priv->value_builder); + } + return priv->value_builder; +} + + +typedef struct GArrowLargeListArrayBuilderPrivate_ { + GArrowArrayBuilder *value_builder; +} GArrowLargeListArrayBuilderPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowLargeListArrayBuilder, + garrow_large_list_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +#define GARROW_LARGE_LIST_ARRAY_BUILDER_GET_PRIVATE(obj) \ + static_cast<GArrowLargeListArrayBuilderPrivate *>( \ + garrow_large_list_array_builder_get_instance_private( \ + GARROW_LARGE_LIST_ARRAY_BUILDER(obj))) + +static void +garrow_large_list_array_builder_dispose(GObject *object) +{ + auto priv = GARROW_LARGE_LIST_ARRAY_BUILDER_GET_PRIVATE(object); + + if (priv->value_builder) { + g_object_unref(priv->value_builder); + priv->value_builder = NULL; + } + + G_OBJECT_CLASS(garrow_large_list_array_builder_parent_class)->dispose(object); +} + +static void +garrow_large_list_array_builder_init(GArrowLargeListArrayBuilder *builder) +{ +} + +static void +garrow_large_list_array_builder_class_init(GArrowLargeListArrayBuilderClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_large_list_array_builder_dispose; +} + +/** + * garrow_large_list_array_builder_new: + * @data_type: A #GArrowLargeListDataType for value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: A newly created #GArrowLargeListArrayBuilder. + * + * Since: 0.16.0 + */ +GArrowLargeListArrayBuilder * +garrow_large_list_array_builder_new(GArrowLargeListDataType *data_type, + GError **error) +{ + if (!GARROW_IS_LARGE_LIST_DATA_TYPE(data_type)) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "[large-list-array-builder][new] data type must be large list data type"); + return NULL; + } + + auto arrow_data_type = + garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto builder = garrow_array_builder_new(arrow_data_type, + error, + "[large-list-array-builder][new]"); + return GARROW_LARGE_LIST_ARRAY_BUILDER(builder); +} + +/** + * garrow_large_list_array_builder_append_value: + * @builder: A #GArrowLargeListArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * It appends a new list element. To append a new list element, you + * need to call this function then append list element values to + * `value_builder`. `value_builder` is the #GArrowArrayBuilder + * specified to constructor. You can get `value_builder` by + * garrow_large_list_array_builder_get_value_builder(). + * + * Since: 0.16.0 + */ +gboolean +garrow_large_list_array_builder_append_value(GArrowLargeListArrayBuilder *builder, + GError **error) +{ + auto arrow_builder = + static_cast<arrow::LargeListBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + + auto status = arrow_builder->Append(); + return garrow_error_check(error, status, "[large-list-array-builder][append-value]"); +} + +/** + * garrow_large_list_array_builder_append_null: (skip) + * @builder: A #GArrowLargeListArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * It appends a new NULL element. + * + * Since: 0.16.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_large_list_array_builder_append_null(GArrowLargeListArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_large_list_array_builder_get_value_builder: + * @builder: A #GArrowLargeListArrayBuilder. + * + * Returns: (transfer none): The #GArrowArrayBuilder for values. + * + * Since: 0.16.0 + */ +GArrowArrayBuilder * +garrow_large_list_array_builder_get_value_builder(GArrowLargeListArrayBuilder *builder) +{ + auto priv = GARROW_LARGE_LIST_ARRAY_BUILDER_GET_PRIVATE(builder); + if (!priv->value_builder) { + auto arrow_builder = + static_cast<arrow::LargeListBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + auto arrow_value_builder = arrow_builder->value_builder(); + priv->value_builder = garrow_array_builder_new_raw(arrow_value_builder); + garrow_array_builder_release_ownership(priv->value_builder); + } + return priv->value_builder; +} + + +typedef struct GArrowStructArrayBuilderPrivate_ { + GList *field_builders; +} GArrowStructArrayBuilderPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowStructArrayBuilder, + garrow_struct_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +#define GARROW_STRUCT_ARRAY_BUILDER_GET_PRIVATE(obj) \ + static_cast<GArrowStructArrayBuilderPrivate *>( \ + garrow_struct_array_builder_get_instance_private( \ + GARROW_STRUCT_ARRAY_BUILDER(obj))) + +static void +garrow_struct_array_builder_dispose(GObject *object) +{ + auto priv = GARROW_STRUCT_ARRAY_BUILDER_GET_PRIVATE(object); + + for (auto node = priv->field_builders; node; node = g_list_next(node)) { + auto field_builder = static_cast<GArrowArrayBuilder *>(node->data); + GArrowArrayBuilderPrivate *field_builder_priv; + + field_builder_priv = GARROW_ARRAY_BUILDER_GET_PRIVATE(field_builder); + field_builder_priv->array_builder = nullptr; + g_object_unref(field_builder); + } + g_list_free(priv->field_builders); + priv->field_builders = NULL; + + G_OBJECT_CLASS(garrow_struct_array_builder_parent_class)->dispose(object); +} + +static void +garrow_struct_array_builder_init(GArrowStructArrayBuilder *builder) +{ +} + +static void +garrow_struct_array_builder_class_init(GArrowStructArrayBuilderClass *klass) +{ + GObjectClass *gobject_class; + + gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_struct_array_builder_dispose; +} + +/** + * garrow_struct_array_builder_new: + * @data_type: #GArrowStructDataType for the struct. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: A newly created #GArrowStructArrayBuilder. + */ +GArrowStructArrayBuilder * +garrow_struct_array_builder_new(GArrowStructDataType *data_type, + GError **error) +{ + if (!GARROW_IS_STRUCT_DATA_TYPE(data_type)) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "[struct-array-builder][new] data type must be struct data type"); + return NULL; + } + + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto builder = garrow_array_builder_new(arrow_data_type, + error, + "[struct-array-builder][new]"); + return GARROW_STRUCT_ARRAY_BUILDER(builder); +} + +/** + * garrow_struct_array_builder_append: + * @builder: A #GArrowStructArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * It appends a new struct element. To append a new struct element, + * you need to call this function then append struct element field + * values to all `field_builder`s. `field_value`s are the + * #GArrowArrayBuilder specified to constructor. You can get + * `field_builder` by garrow_struct_array_builder_get_field_builder() + * or garrow_struct_array_builder_get_field_builders(). + * + * |[<!-- language="C" --> + * // TODO + * ]| + * + * Deprecated: 0.12.0: + * Use garrow_struct_array_builder_append_value() instead. + */ +gboolean +garrow_struct_array_builder_append(GArrowStructArrayBuilder *builder, + GError **error) +{ + return garrow_struct_array_builder_append_value(builder, error); +} + +/** + * garrow_struct_array_builder_append_value: + * @builder: A #GArrowStructArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * It appends a new struct element. To append a new struct element, + * you need to call this function then append struct element field + * values to all `field_builder`s. `field_value`s are the + * #GArrowArrayBuilder specified to constructor. You can get + * `field_builder` by garrow_struct_array_builder_get_field_builder() + * or garrow_struct_array_builder_get_field_builders(). + * + * |[<!-- language="C" --> + * // TODO + * ]| + * + * Since: 0.12.0 + */ +gboolean +garrow_struct_array_builder_append_value(GArrowStructArrayBuilder *builder, + GError **error) +{ + auto arrow_builder = + static_cast<arrow::StructBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + + auto status = arrow_builder->Append(); + return garrow_error_check(error, + status, + "[struct-array-builder][append-value]"); +} + +/** + * garrow_struct_array_builder_append_null: (skip) + * @builder: A #GArrowStructArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * It appends a new NULL element. + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_struct_array_builder_append_null(GArrowStructArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_struct_array_builder_get_field_builder: + * @builder: A #GArrowStructArrayBuilder. + * @i: The index of the field in the struct. + * + * Returns: (transfer none): The #GArrowArrayBuilder for the i-th field. + */ +GArrowArrayBuilder * +garrow_struct_array_builder_get_field_builder(GArrowStructArrayBuilder *builder, + gint i) +{ + auto field_builders = garrow_struct_array_builder_get_field_builders(builder); + auto field_builder = g_list_nth_data(field_builders, i); + return static_cast<GArrowArrayBuilder *>(field_builder); +} + +/** + * garrow_struct_array_builder_get_field_builders: + * @builder: A #GArrowStructArrayBuilder. + * + * Returns: (element-type GArrowArray) (transfer none): + * The #GArrowArrayBuilder for all fields. + */ +GList * +garrow_struct_array_builder_get_field_builders(GArrowStructArrayBuilder *builder) +{ + auto priv = GARROW_STRUCT_ARRAY_BUILDER_GET_PRIVATE(builder); + if (!priv->field_builders) { + auto arrow_struct_builder = + static_cast<arrow::StructBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + + GList *field_builders = NULL; + for (int i = 0; i < arrow_struct_builder->num_fields(); ++i) { + auto arrow_field_builder = arrow_struct_builder->field_builder(i); + auto field_builder = garrow_array_builder_new_raw(arrow_field_builder); + field_builders = g_list_prepend(field_builders, field_builder); + } + priv->field_builders = g_list_reverse(field_builders); + } + + return priv->field_builders; +} + + +typedef struct GArrowMapArrayBuilderPrivate_ { + GArrowArrayBuilder *key_builder; + GArrowArrayBuilder *item_builder; + GArrowArrayBuilder *value_builder; +} GArrowMapArrayBuilderPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowMapArrayBuilder, + garrow_map_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +#define GARROW_MAP_ARRAY_BUILDER_GET_PRIVATE(object) \ + static_cast<GArrowMapArrayBuilderPrivate *>( \ + garrow_map_array_builder_get_instance_private( \ + GARROW_MAP_ARRAY_BUILDER(object))) + +static void +garrow_map_array_builder_dispose(GObject *object) +{ + auto priv = GARROW_MAP_ARRAY_BUILDER_GET_PRIVATE(object); + + if (priv->key_builder) { + g_object_unref(priv->key_builder); + priv->key_builder = NULL; + } + + if (priv->item_builder) { + g_object_unref(priv->item_builder); + priv->item_builder = NULL; + } + + if (priv->value_builder) { + g_object_unref(priv->value_builder); + priv->value_builder = NULL; + } + + G_OBJECT_CLASS(garrow_map_array_builder_parent_class)->dispose(object); +} + +static void +garrow_map_array_builder_init(GArrowMapArrayBuilder *builder) +{ +} + +static void +garrow_map_array_builder_class_init(GArrowMapArrayBuilderClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_map_array_builder_dispose; +} + +/** + * garrow_map_array_builder_new: + * @data_type: #GArrowMapDataType for the map. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowMapArrayBuilder on success, + * %NULL on error. + * + * Since: 0.17.0 + */ +GArrowMapArrayBuilder * +garrow_map_array_builder_new(GArrowMapDataType *data_type, + GError **error) +{ + if (!GARROW_IS_MAP_DATA_TYPE(data_type)) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "[map-array-builder][new] data type must be map data type"); + return NULL; + } + + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto builder = garrow_array_builder_new(arrow_data_type, + error, + "[map-array-builder][new]"); + if (builder) { + return GARROW_MAP_ARRAY_BUILDER(builder); + } else { + return NULL; + } +} + +/** + * garrow_map_array_builder_append_value: + * @builder: A #GArrowMapArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.17.0 + */ +gboolean +garrow_map_array_builder_append_value(GArrowMapArrayBuilder *builder, + GError **error) +{ + auto arrow_builder = + static_cast<arrow::MapBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + + auto status = arrow_builder->Append(); + return garrow::check(error, + status, + "[map-array-builder][append-value]"); +} + +/** + * garrow_map_array_builder_append_values: + * @builder: A #GArrowMapArrayBuilder. + * @offsets: (array length=offsets_length): The array of signed int. + * @offsets_length: The length of `offsets`. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth `is_valids` is %TRUE, the Nth `values` is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of `is_valids`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.17.0 + */ +gboolean +garrow_map_array_builder_append_values(GArrowMapArrayBuilder *builder, + const gint32 *offsets, + gint64 offsets_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values<arrow::MapBuilder *> + (GARROW_ARRAY_BUILDER(builder), + reinterpret_cast<const int32_t *>(offsets), + offsets_length, + is_valids, + is_valids_length, + error, + "[map-array-builder][append-values]"); +} + +/** + * garrow_map_array_builder_append_null: (skip) + * @builder: A #GArrowMapArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.17.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_map_array_builder_append_null(GArrowMapArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + +/** + * garrow_map_array_builder_append_nulls: (skip) + * @builder: A #GArrowMapArrayBuilder. + * @n: The number of null values to be appended. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple nulls at once. It's more efficient than multiple + * `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.17.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_nulls() instead. + */ +gboolean +garrow_map_array_builder_append_nulls(GArrowMapArrayBuilder *builder, + gint64 n, + GError **error) +{ + return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), + n, + error); +} + +/** + * garrow_map_array_builder_get_key_builder: + * @builder: A #GArrowMapArrayBuilder. + * + * Returns: (transfer none): The #GArrowArrayBuilder for key values. + * + * Since: 0.17.0 + */ +GArrowArrayBuilder * +garrow_map_array_builder_get_key_builder(GArrowMapArrayBuilder *builder) +{ + auto priv = GARROW_MAP_ARRAY_BUILDER_GET_PRIVATE(builder); + if (!priv->key_builder) { + auto arrow_builder = + static_cast<arrow::MapBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + auto arrow_key_builder = arrow_builder->key_builder(); + priv->key_builder = garrow_array_builder_new_raw(arrow_key_builder); + garrow_array_builder_release_ownership(priv->key_builder); + } + return priv->key_builder; +} + +/** + * garrow_map_array_builder_get_item_builder: + * @builder: A #GArrowMapArrayBuilder. + * + * Returns: (transfer none): The #GArrowArrayBuilder for item values. + * + * Since: 0.17.0 + */ +GArrowArrayBuilder * +garrow_map_array_builder_get_item_builder(GArrowMapArrayBuilder *builder) +{ + auto priv = GARROW_MAP_ARRAY_BUILDER_GET_PRIVATE(builder); + if (!priv->item_builder) { + auto arrow_builder = + static_cast<arrow::MapBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + auto arrow_item_builder = arrow_builder->item_builder(); + priv->item_builder = garrow_array_builder_new_raw(arrow_item_builder); + garrow_array_builder_release_ownership(priv->item_builder); + } + return priv->item_builder; +} + +/** + * garrow_map_array_builder_get_value_builder: + * @builder: A #GArrowMapArrayBuilder. + * + * Returns: (transfer none): The #GArrowArrayBuilder to add map entries as struct values. + * This can be used instead of garrow_map_array_builder_get_key_builder() and + * garrow_map_array_builder_get_item_builder(). You can build map entries as a list of + * struct values with this builder. + * + * Since: 0.17.0 + */ +GArrowArrayBuilder * +garrow_map_array_builder_get_value_builder(GArrowMapArrayBuilder *builder) +{ + auto priv = GARROW_MAP_ARRAY_BUILDER_GET_PRIVATE(builder); + if (!priv->value_builder) { + auto arrow_builder = + static_cast<arrow::MapBuilder *>( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + auto arrow_value_builder = arrow_builder->value_builder(); + priv->value_builder = garrow_array_builder_new_raw(arrow_value_builder); + garrow_array_builder_release_ownership(priv->value_builder); + } + return priv->value_builder; +} + + +G_DEFINE_TYPE(GArrowDecimal128ArrayBuilder, + garrow_decimal128_array_builder, + GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY_BUILDER) + +static void +garrow_decimal128_array_builder_init(GArrowDecimal128ArrayBuilder *builder) +{ +} + +static void +garrow_decimal128_array_builder_class_init(GArrowDecimal128ArrayBuilderClass *klass) +{ +} + +/** + * garrow_decimal128_array_builder_new: + * @data_type: #GArrowDecimal128DataType for the decimal. + * + * Returns: A newly created #GArrowDecimal128ArrayBuilder. + * + * Since: 0.10.0 + */ +GArrowDecimal128ArrayBuilder * +garrow_decimal128_array_builder_new(GArrowDecimal128DataType *data_type) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto builder = garrow_array_builder_new(arrow_data_type, + NULL, + "[decimal128-array-builder][new]"); + return GARROW_DECIMAL128_ARRAY_BUILDER(builder); +} + +/** + * garrow_decimal128_array_builder_append: + * @builder: A #GArrowDecimal128ArrayBuilder. + * @value: A decimal value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.10.0 + * + * Deprecated: 0.12.0: + * Use garrow_decimal128_array_builder_append_value() instead. + */ +gboolean +garrow_decimal128_array_builder_append(GArrowDecimal128ArrayBuilder *builder, + GArrowDecimal128 *value, + GError **error) +{ + return garrow_decimal128_array_builder_append_value(builder, value, error); +} + +/** + * garrow_decimal128_array_builder_append_value: + * @builder: A #GArrowDecimal128ArrayBuilder. + * @value: (nullable): A decimal value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_decimal128_array_builder_append_value(GArrowDecimal128ArrayBuilder *builder, + GArrowDecimal128 *value, + GError **error) +{ + if (value) { + auto arrow_decimal = garrow_decimal128_get_raw(value); + return garrow_array_builder_append_value<arrow::Decimal128Builder *> + (GARROW_ARRAY_BUILDER(builder), + *arrow_decimal, + error, + "[decimal128-array-builder][append-value]"); + } else { + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), + error); + } +} + +/** + * garrow_decimal128_array_builder_append_values: + * @builder: A #GArrowDecimal128ArrayBuilder. + * @values: (array length=values_length): The array of #GArrowDecimal128. + * @values_length: The length of @values. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth @is_valids is %TRUE, the Nth @values is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of @is_valids. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 3.0.0 + */ +gboolean +garrow_decimal128_array_builder_append_values( + GArrowDecimal128ArrayBuilder *builder, + GArrowDecimal128 **values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values( + GARROW_ARRAY_BUILDER(builder), + values, + values_length, + is_valids, + is_valids_length, + error, + "[decimal128-array-builder][append-values]", + [](guint8 *output, GArrowDecimal128 *value, gsize size) { + auto arrow_decimal = garrow_decimal128_get_raw(value); + arrow_decimal->ToBytes(output); + }); +} + +/** + * garrow_decimal128_array_builder_append_null: (skip) + * @builder: A #GArrowDecimal128ArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * It appends a new NULL element. + * + * Since: 0.12.0 + * + * Deprecated: 3.0.0: + * Use garrow_array_builder_append_null() instead. + */ +gboolean +garrow_decimal128_array_builder_append_null(GArrowDecimal128ArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error); +} + + +G_DEFINE_TYPE(GArrowDecimal256ArrayBuilder, + garrow_decimal256_array_builder, + GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY_BUILDER) + +static void +garrow_decimal256_array_builder_init(GArrowDecimal256ArrayBuilder *builder) +{ +} + +static void +garrow_decimal256_array_builder_class_init(GArrowDecimal256ArrayBuilderClass *klass) +{ +} + +/** + * garrow_decimal256_array_builder_new: + * @data_type: #GArrowDecimal256DataType for the decimal. + * + * Returns: A newly created #GArrowDecimal256ArrayBuilder. + * + * Since: 3.0.0 + */ +GArrowDecimal256ArrayBuilder * +garrow_decimal256_array_builder_new(GArrowDecimal256DataType *data_type) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto builder = garrow_array_builder_new(arrow_data_type, + NULL, + "[decimal256-array-builder][new]"); + return GARROW_DECIMAL256_ARRAY_BUILDER(builder); +} + +/** + * garrow_decimal256_array_builder_append_value: + * @builder: A #GArrowDecimal256ArrayBuilder. + * @value: (nullable): A decimal value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 3.0.0 + */ +gboolean +garrow_decimal256_array_builder_append_value(GArrowDecimal256ArrayBuilder *builder, + GArrowDecimal256 *value, + GError **error) +{ + if (value) { + auto arrow_decimal = garrow_decimal256_get_raw(value); + return garrow_array_builder_append_value<arrow::Decimal256Builder *> + (GARROW_ARRAY_BUILDER(builder), + *arrow_decimal, + error, + "[decimal256-array-builder][append-value]"); + } else { + return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), + error); + } +} + +/** + * garrow_decimal256_array_builder_append_values: + * @builder: A #GArrowDecimal256ArrayBuilder. + * @values: (array length=values_length): The array of #GArrowDecimal256. + * @values_length: The length of @values. + * @is_valids: (nullable) (array length=is_valids_length): The array of + * boolean that shows whether the Nth value is valid or not. If the + * Nth @is_valids is %TRUE, the Nth @values is valid value. Otherwise + * the Nth value is null value. + * @is_valids_length: The length of @is_valids. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Append multiple values at once. It's more efficient than multiple + * `append` and `append_null` calls. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 3.0.0 + */ +gboolean +garrow_decimal256_array_builder_append_values( + GArrowDecimal256ArrayBuilder *builder, + GArrowDecimal256 **values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error) +{ + return garrow_array_builder_append_values( + GARROW_ARRAY_BUILDER(builder), + values, + values_length, + is_valids, + is_valids_length, + error, + "[decimal256-array-builder][append-values]", + [](guint8 *output, GArrowDecimal256 *value, gsize size) { + auto arrow_decimal = garrow_decimal256_get_raw(value); + arrow_decimal->ToBytes(output); + }); +} + + +G_END_DECLS + +GArrowArrayBuilder * +garrow_array_builder_new_raw(arrow::ArrayBuilder *arrow_builder, + GType type) +{ + if (type == G_TYPE_INVALID) { + switch (arrow_builder->type()->id()) { + case arrow::Type::type::NA: + type = GARROW_TYPE_NULL_ARRAY_BUILDER; + break; + case arrow::Type::type::BOOL: + type = GARROW_TYPE_BOOLEAN_ARRAY_BUILDER; + break; + case arrow::Type::type::UINT8: + type = GARROW_TYPE_UINT8_ARRAY_BUILDER; + break; + case arrow::Type::type::INT8: + type = GARROW_TYPE_INT8_ARRAY_BUILDER; + break; + case arrow::Type::type::UINT16: + type = GARROW_TYPE_UINT16_ARRAY_BUILDER; + break; + case arrow::Type::type::INT16: + type = GARROW_TYPE_INT16_ARRAY_BUILDER; + break; + case arrow::Type::type::UINT32: + type = GARROW_TYPE_UINT32_ARRAY_BUILDER; + break; + case arrow::Type::type::INT32: + type = GARROW_TYPE_INT32_ARRAY_BUILDER; + break; + case arrow::Type::type::UINT64: + type = GARROW_TYPE_UINT64_ARRAY_BUILDER; + break; + case arrow::Type::type::INT64: + type = GARROW_TYPE_INT64_ARRAY_BUILDER; + break; + case arrow::Type::type::FLOAT: + type = GARROW_TYPE_FLOAT_ARRAY_BUILDER; + break; + case arrow::Type::type::DOUBLE: + type = GARROW_TYPE_DOUBLE_ARRAY_BUILDER; + break; + case arrow::Type::type::BINARY: + type = GARROW_TYPE_BINARY_ARRAY_BUILDER; + break; + case arrow::Type::type::LARGE_BINARY: + type = GARROW_TYPE_LARGE_BINARY_ARRAY_BUILDER; + break; + case arrow::Type::type::STRING: + type = GARROW_TYPE_STRING_ARRAY_BUILDER; + break; + case arrow::Type::type::LARGE_STRING: + type = GARROW_TYPE_LARGE_STRING_ARRAY_BUILDER; + break; + case arrow::Type::type::FIXED_SIZE_BINARY: + type = GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY_BUILDER; + break; + case arrow::Type::type::DATE32: + type = GARROW_TYPE_DATE32_ARRAY_BUILDER; + break; + case arrow::Type::type::DATE64: + type = GARROW_TYPE_DATE64_ARRAY_BUILDER; + break; + case arrow::Type::type::TIMESTAMP: + type = GARROW_TYPE_TIMESTAMP_ARRAY_BUILDER; + break; + case arrow::Type::type::TIME32: + type = GARROW_TYPE_TIME32_ARRAY_BUILDER; + break; + case arrow::Type::type::TIME64: + type = GARROW_TYPE_TIME64_ARRAY_BUILDER; + break; + case arrow::Type::type::LIST: + type = GARROW_TYPE_LIST_ARRAY_BUILDER; + break; + case arrow::Type::type::LARGE_LIST: + type = GARROW_TYPE_LARGE_LIST_ARRAY_BUILDER; + break; + case arrow::Type::type::STRUCT: + type = GARROW_TYPE_STRUCT_ARRAY_BUILDER; + break; + case arrow::Type::type::MAP: + type = GARROW_TYPE_MAP_ARRAY_BUILDER; + break; + case arrow::Type::type::DECIMAL128: + type = GARROW_TYPE_DECIMAL128_ARRAY_BUILDER; + break; + case arrow::Type::type::DECIMAL256: + type = GARROW_TYPE_DECIMAL256_ARRAY_BUILDER; + break; + case arrow::Type::type::DICTIONARY: + { + auto dict_type = + std::static_pointer_cast<arrow::DictionaryType>(arrow_builder->type()); + switch (dict_type->value_type()->id()) { + case arrow::Type::type::BINARY: + type = GARROW_TYPE_BINARY_DICTIONARY_ARRAY_BUILDER; + break; + case arrow::Type::type::STRING: + type = GARROW_TYPE_STRING_DICTIONARY_ARRAY_BUILDER; + break; + default: + type = GARROW_TYPE_ARRAY_BUILDER; + break; + } + } + break; + default: + type = GARROW_TYPE_ARRAY_BUILDER; + break; + } + } + + auto builder = + GARROW_ARRAY_BUILDER(g_object_new(type, + "array-builder", arrow_builder, + NULL)); + return builder; +} + +arrow::ArrayBuilder * +garrow_array_builder_get_raw(GArrowArrayBuilder *builder) +{ + auto priv = GARROW_ARRAY_BUILDER_GET_PRIVATE(builder); + return priv->array_builder; +} diff --git a/src/arrow/c_glib/arrow-glib/array-builder.h b/src/arrow/c_glib/arrow-glib/array-builder.h new file mode 100644 index 000000000..7ab7a4c49 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/array-builder.h @@ -0,0 +1,1387 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/array.h> +#include <arrow-glib/decimal.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_ARRAY_BUILDER (garrow_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowArrayBuilder, + garrow_array_builder, + GARROW, + ARRAY_BUILDER, + GObject) +struct _GArrowArrayBuilderClass +{ + GObjectClass parent_class; +}; + +void garrow_array_builder_release_ownership(GArrowArrayBuilder *builder); + +GArrowDataType * +garrow_array_builder_get_value_data_type(GArrowArrayBuilder *builder); +GArrowType garrow_array_builder_get_value_type(GArrowArrayBuilder *builder); + +GArrowArray *garrow_array_builder_finish(GArrowArrayBuilder *builder, + GError **error); + +GARROW_AVAILABLE_IN_2_0 +void garrow_array_builder_reset(GArrowArrayBuilder *builder); + +GARROW_AVAILABLE_IN_2_0 +gint64 garrow_array_builder_get_capacity(GArrowArrayBuilder *builder); +GARROW_AVAILABLE_IN_2_0 +gint64 garrow_array_builder_get_length(GArrowArrayBuilder *builder); +GARROW_AVAILABLE_IN_2_0 +gint64 garrow_array_builder_get_n_nulls(GArrowArrayBuilder *builder); + +GARROW_AVAILABLE_IN_2_0 +gboolean garrow_array_builder_resize(GArrowArrayBuilder *builder, + gint64 capacity, + GError **error); +GARROW_AVAILABLE_IN_2_0 +gboolean garrow_array_builder_reserve(GArrowArrayBuilder *builder, + gint64 additional_capacity, + GError **error); + +GARROW_AVAILABLE_IN_3_0 +gboolean garrow_array_builder_append_null(GArrowArrayBuilder *builder, + GError **error); +GARROW_AVAILABLE_IN_3_0 +gboolean garrow_array_builder_append_nulls(GArrowArrayBuilder *builder, + gint64 n, + GError **error); +GARROW_AVAILABLE_IN_3_0 +gboolean garrow_array_builder_append_empty_value(GArrowArrayBuilder *builder, + GError **error); +GARROW_AVAILABLE_IN_3_0 +gboolean garrow_array_builder_append_empty_values(GArrowArrayBuilder *builder, + gint64 n, + GError **error); + +#define GARROW_TYPE_NULL_ARRAY_BUILDER (garrow_null_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowNullArrayBuilder, + garrow_null_array_builder, + GARROW, + NULL_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowNullArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_13 +GArrowNullArrayBuilder *garrow_null_array_builder_new(void); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +GARROW_AVAILABLE_IN_0_13 +gboolean garrow_null_array_builder_append_null(GArrowNullArrayBuilder *builder, + GError **error); +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) +GARROW_AVAILABLE_IN_0_13 +gboolean garrow_null_array_builder_append_nulls(GArrowNullArrayBuilder *builder, + gint64 n, + GError **error); +#endif + + +#define GARROW_TYPE_BOOLEAN_ARRAY_BUILDER \ + (garrow_boolean_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowBooleanArrayBuilder, + garrow_boolean_array_builder, + GARROW, + BOOLEAN_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowBooleanArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GArrowBooleanArrayBuilder *garrow_boolean_array_builder_new(void); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_boolean_array_builder_append_value) +gboolean garrow_boolean_array_builder_append(GArrowBooleanArrayBuilder *builder, + gboolean value, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_boolean_array_builder_append_value(GArrowBooleanArrayBuilder *builder, + gboolean value, + GError **error); +gboolean garrow_boolean_array_builder_append_values(GArrowBooleanArrayBuilder *builder, + const gboolean *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +gboolean garrow_boolean_array_builder_append_null(GArrowBooleanArrayBuilder *builder, + GError **error); +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) +gboolean garrow_boolean_array_builder_append_nulls(GArrowBooleanArrayBuilder *builder, + gint64 n, + GError **error); +#endif + + +#define GARROW_TYPE_INT_ARRAY_BUILDER (garrow_int_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowIntArrayBuilder, + garrow_int_array_builder, + GARROW, + INT_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowIntArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GArrowIntArrayBuilder *garrow_int_array_builder_new(void); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_int_array_builder_append_value) +gboolean garrow_int_array_builder_append(GArrowIntArrayBuilder *builder, + gint64 value, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_int_array_builder_append_value(GArrowIntArrayBuilder *builder, + gint64 value, + GError **error); +gboolean garrow_int_array_builder_append_values(GArrowIntArrayBuilder *builder, + const gint64 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +gboolean garrow_int_array_builder_append_null(GArrowIntArrayBuilder *builder, + GError **error); +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) +gboolean garrow_int_array_builder_append_nulls(GArrowIntArrayBuilder *builder, + gint64 n, + GError **error); +#endif + + +#define GARROW_TYPE_UINT_ARRAY_BUILDER (garrow_uint_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowUIntArrayBuilder, + garrow_uint_array_builder, + GARROW, + UINT_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowUIntArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GArrowUIntArrayBuilder *garrow_uint_array_builder_new(void); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint_array_builder_append_value) +gboolean garrow_uint_array_builder_append(GArrowUIntArrayBuilder *builder, + guint64 value, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_uint_array_builder_append_value(GArrowUIntArrayBuilder *builder, + guint64 value, + GError **error); +gboolean garrow_uint_array_builder_append_values(GArrowUIntArrayBuilder *builder, + const guint64 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +gboolean garrow_uint_array_builder_append_null(GArrowUIntArrayBuilder *builder, + GError **error); +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) +gboolean garrow_uint_array_builder_append_nulls(GArrowUIntArrayBuilder *builder, + gint64 n, + GError **error); +#endif + + +#define GARROW_TYPE_INT8_ARRAY_BUILDER (garrow_int8_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowInt8ArrayBuilder, + garrow_int8_array_builder, + GARROW, + INT8_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowInt8ArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GArrowInt8ArrayBuilder *garrow_int8_array_builder_new(void); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_int8_array_builder_append_value) +gboolean garrow_int8_array_builder_append(GArrowInt8ArrayBuilder *builder, + gint8 value, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_int8_array_builder_append_value(GArrowInt8ArrayBuilder *builder, + gint8 value, + GError **error); +gboolean garrow_int8_array_builder_append_values(GArrowInt8ArrayBuilder *builder, + const gint8 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +gboolean garrow_int8_array_builder_append_null(GArrowInt8ArrayBuilder *builder, + GError **error); +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) +gboolean garrow_int8_array_builder_append_nulls(GArrowInt8ArrayBuilder *builder, + gint64 n, + GError **error); +#endif + + +#define GARROW_TYPE_UINT8_ARRAY_BUILDER (garrow_uint8_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowUInt8ArrayBuilder, + garrow_uint8_array_builder, + GARROW, + UINT8_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowUInt8ArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GArrowUInt8ArrayBuilder *garrow_uint8_array_builder_new(void); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint8_array_builder_append_value) +gboolean garrow_uint8_array_builder_append(GArrowUInt8ArrayBuilder *builder, + guint8 value, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_uint8_array_builder_append_value(GArrowUInt8ArrayBuilder *builder, + guint8 value, + GError **error); +gboolean garrow_uint8_array_builder_append_values(GArrowUInt8ArrayBuilder *builder, + const guint8 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +gboolean garrow_uint8_array_builder_append_null(GArrowUInt8ArrayBuilder *builder, + GError **error); +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) +gboolean garrow_uint8_array_builder_append_nulls(GArrowUInt8ArrayBuilder *builder, + gint64 n, + GError **error); +#endif + + +#define GARROW_TYPE_INT16_ARRAY_BUILDER (garrow_int16_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowInt16ArrayBuilder, + garrow_int16_array_builder, + GARROW, + INT16_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowInt16ArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GArrowInt16ArrayBuilder *garrow_int16_array_builder_new(void); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_int16_array_builder_append_value) +gboolean garrow_int16_array_builder_append(GArrowInt16ArrayBuilder *builder, + gint16 value, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_int16_array_builder_append_value(GArrowInt16ArrayBuilder *builder, + gint16 value, + GError **error); +gboolean garrow_int16_array_builder_append_values(GArrowInt16ArrayBuilder *builder, + const gint16 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +gboolean garrow_int16_array_builder_append_null(GArrowInt16ArrayBuilder *builder, + GError **error); +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) +gboolean garrow_int16_array_builder_append_nulls(GArrowInt16ArrayBuilder *builder, + gint64 n, + GError **error); +#endif + + +#define GARROW_TYPE_UINT16_ARRAY_BUILDER \ + (garrow_uint16_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowUInt16ArrayBuilder, + garrow_uint16_array_builder, + GARROW, + UINT16_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowUInt16ArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GArrowUInt16ArrayBuilder *garrow_uint16_array_builder_new(void); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint16_array_builder_append_value) +gboolean garrow_uint16_array_builder_append(GArrowUInt16ArrayBuilder *builder, + guint16 value, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_uint16_array_builder_append_value(GArrowUInt16ArrayBuilder *builder, + guint16 value, + GError **error); +gboolean garrow_uint16_array_builder_append_values(GArrowUInt16ArrayBuilder *builder, + const guint16 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +gboolean garrow_uint16_array_builder_append_null(GArrowUInt16ArrayBuilder *builder, + GError **error); +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) +gboolean garrow_uint16_array_builder_append_nulls(GArrowUInt16ArrayBuilder *builder, + gint64 n, + GError **error); +#endif + + +#define GARROW_TYPE_INT32_ARRAY_BUILDER (garrow_int32_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowInt32ArrayBuilder, + garrow_int32_array_builder, + GARROW, + INT32_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowInt32ArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GArrowInt32ArrayBuilder *garrow_int32_array_builder_new(void); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_int32_array_builder_append_value) +gboolean garrow_int32_array_builder_append(GArrowInt32ArrayBuilder *builder, + gint32 value, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_int32_array_builder_append_value(GArrowInt32ArrayBuilder *builder, + gint32 value, + GError **error); +gboolean garrow_int32_array_builder_append_values(GArrowInt32ArrayBuilder *builder, + const gint32 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +gboolean garrow_int32_array_builder_append_null(GArrowInt32ArrayBuilder *builder, + GError **error); +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) +gboolean garrow_int32_array_builder_append_nulls(GArrowInt32ArrayBuilder *builder, + gint64 n, + GError **error); +#endif + + +#define GARROW_TYPE_UINT32_ARRAY_BUILDER \ + (garrow_uint32_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowUInt32ArrayBuilder, + garrow_uint32_array_builder, + GARROW, + UINT32_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowUInt32ArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GArrowUInt32ArrayBuilder *garrow_uint32_array_builder_new(void); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint32_array_builder_append_value) +gboolean garrow_uint32_array_builder_append(GArrowUInt32ArrayBuilder *builder, + guint32 value, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_uint32_array_builder_append_value(GArrowUInt32ArrayBuilder *builder, + guint32 value, + GError **error); +gboolean garrow_uint32_array_builder_append_values(GArrowUInt32ArrayBuilder *builder, + const guint32 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +gboolean garrow_uint32_array_builder_append_null(GArrowUInt32ArrayBuilder *builder, + GError **error); +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) +gboolean garrow_uint32_array_builder_append_nulls(GArrowUInt32ArrayBuilder *builder, + gint64 n, + GError **error); +#endif + + +#define GARROW_TYPE_INT64_ARRAY_BUILDER (garrow_int64_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowInt64ArrayBuilder, + garrow_int64_array_builder, + GARROW, + INT64_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowInt64ArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GArrowInt64ArrayBuilder *garrow_int64_array_builder_new(void); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_int64_array_builder_append_value) +gboolean garrow_int64_array_builder_append(GArrowInt64ArrayBuilder *builder, + gint64 value, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_int64_array_builder_append_value(GArrowInt64ArrayBuilder *builder, + gint64 value, + GError **error); +gboolean garrow_int64_array_builder_append_values(GArrowInt64ArrayBuilder *builder, + const gint64 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +gboolean garrow_int64_array_builder_append_null(GArrowInt64ArrayBuilder *builder, + GError **error); +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) +gboolean garrow_int64_array_builder_append_nulls(GArrowInt64ArrayBuilder *builder, + gint64 n, + GError **error); +#endif + + +#define GARROW_TYPE_UINT64_ARRAY_BUILDER \ + (garrow_uint64_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowUInt64ArrayBuilder, + garrow_uint64_array_builder, + GARROW, + UINT64_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowUInt64ArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GArrowUInt64ArrayBuilder *garrow_uint64_array_builder_new(void); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint64_array_builder_append_value) +gboolean garrow_uint64_array_builder_append(GArrowUInt64ArrayBuilder *builder, + guint64 value, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_uint64_array_builder_append_value(GArrowUInt64ArrayBuilder *builder, + guint64 value, + GError **error); +gboolean garrow_uint64_array_builder_append_values(GArrowUInt64ArrayBuilder *builder, + const guint64 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +gboolean garrow_uint64_array_builder_append_null(GArrowUInt64ArrayBuilder *builder, + GError **error); +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) +gboolean garrow_uint64_array_builder_append_nulls(GArrowUInt64ArrayBuilder *builder, + gint64 n, + GError **error); +#endif + + +#define GARROW_TYPE_FLOAT_ARRAY_BUILDER (garrow_float_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowFloatArrayBuilder, + garrow_float_array_builder, + GARROW, + FLOAT_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowFloatArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GArrowFloatArrayBuilder *garrow_float_array_builder_new(void); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_float_array_builder_append_value) +gboolean garrow_float_array_builder_append(GArrowFloatArrayBuilder *builder, + gfloat value, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_float_array_builder_append_value(GArrowFloatArrayBuilder *builder, + gfloat value, + GError **error); +gboolean garrow_float_array_builder_append_values(GArrowFloatArrayBuilder *builder, + const gfloat *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +gboolean garrow_float_array_builder_append_null(GArrowFloatArrayBuilder *builder, + GError **error); +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) +gboolean garrow_float_array_builder_append_nulls(GArrowFloatArrayBuilder *builder, + gint64 n, + GError **error); +#endif + + +#define GARROW_TYPE_DOUBLE_ARRAY_BUILDER \ + (garrow_double_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDoubleArrayBuilder, + garrow_double_array_builder, + GARROW, + DOUBLE_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowDoubleArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GArrowDoubleArrayBuilder *garrow_double_array_builder_new(void); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_double_array_builder_append_value) +gboolean garrow_double_array_builder_append(GArrowDoubleArrayBuilder *builder, + gdouble value, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_double_array_builder_append_value(GArrowDoubleArrayBuilder *builder, + gdouble value, + GError **error); +gboolean garrow_double_array_builder_append_values(GArrowDoubleArrayBuilder *builder, + const gdouble *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +gboolean garrow_double_array_builder_append_null(GArrowDoubleArrayBuilder *builder, + GError **error); +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) +gboolean garrow_double_array_builder_append_nulls(GArrowDoubleArrayBuilder *builder, + gint64 n, + GError **error); +#endif + + +#define GARROW_TYPE_BINARY_ARRAY_BUILDER \ + (garrow_binary_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowBinaryArrayBuilder, + garrow_binary_array_builder, + GARROW, + BINARY_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowBinaryArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GArrowBinaryArrayBuilder *garrow_binary_array_builder_new(void); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_binary_array_builder_append_value) +gboolean garrow_binary_array_builder_append(GArrowBinaryArrayBuilder *builder, + const guint8 *value, + gint32 length, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_binary_array_builder_append_value(GArrowBinaryArrayBuilder *builder, + const guint8 *value, + gint32 length, + GError **error); +GARROW_AVAILABLE_IN_0_16 +gboolean garrow_binary_array_builder_append_value_bytes(GArrowBinaryArrayBuilder *builder, + GBytes *value, + GError **error); +GARROW_AVAILABLE_IN_0_16 +gboolean garrow_binary_array_builder_append_values(GArrowBinaryArrayBuilder *builder, + GBytes **values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +gboolean garrow_binary_array_builder_append_null(GArrowBinaryArrayBuilder *builder, + GError **error); +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) +GARROW_AVAILABLE_IN_0_16 +gboolean garrow_binary_array_builder_append_nulls(GArrowBinaryArrayBuilder *builder, + gint64 n, + GError **error); +#endif + + +#define GARROW_TYPE_LARGE_BINARY_ARRAY_BUILDER \ + (garrow_large_binary_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowLargeBinaryArrayBuilder, + garrow_large_binary_array_builder, + GARROW, + LARGE_BINARY_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowLargeBinaryArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_16 +GArrowLargeBinaryArrayBuilder *garrow_large_binary_array_builder_new(void); +GARROW_AVAILABLE_IN_0_16 +gboolean garrow_large_binary_array_builder_append_value(GArrowLargeBinaryArrayBuilder *builder, + const guint8 *value, + gint64 length, + GError **error); +GARROW_AVAILABLE_IN_0_16 +gboolean garrow_large_binary_array_builder_append_value_bytes(GArrowLargeBinaryArrayBuilder *builder, + GBytes *value, + GError **error); +GARROW_AVAILABLE_IN_0_16 +gboolean garrow_large_binary_array_builder_append_values(GArrowLargeBinaryArrayBuilder *builder, + GBytes **values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +GARROW_AVAILABLE_IN_0_16 +gboolean garrow_large_binary_array_builder_append_null(GArrowLargeBinaryArrayBuilder *builder, + GError **error); +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) +GARROW_AVAILABLE_IN_0_16 +gboolean garrow_large_binary_array_builder_append_nulls(GArrowLargeBinaryArrayBuilder *builder, + gint64 n, + GError **error); +#endif + + +#define GARROW_TYPE_STRING_ARRAY_BUILDER \ + (garrow_string_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowStringArrayBuilder, + garrow_string_array_builder, + GARROW, + STRING_ARRAY_BUILDER, + GArrowBinaryArrayBuilder) +struct _GArrowStringArrayBuilderClass +{ + GArrowBinaryArrayBuilderClass parent_class; +}; + +GArrowStringArrayBuilder *garrow_string_array_builder_new(void); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_string_array_builder_append_value) +gboolean garrow_string_array_builder_append(GArrowStringArrayBuilder *builder, + const gchar *value, + GError **error); +#endif +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_16_FOR(garrow_string_array_builder_append_string) +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_string_array_builder_append_value(GArrowStringArrayBuilder *builder, + const gchar *value, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_16 +gboolean garrow_string_array_builder_append_string(GArrowStringArrayBuilder *builder, + const gchar *value, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_16_FOR(garrow_string_array_builder_append_strings) +gboolean garrow_string_array_builder_append_values(GArrowStringArrayBuilder *builder, + const gchar **values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_16 +gboolean garrow_string_array_builder_append_strings(GArrowStringArrayBuilder *builder, + const gchar **values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); + + +#define GARROW_TYPE_LARGE_STRING_ARRAY_BUILDER \ + (garrow_large_string_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowLargeStringArrayBuilder, + garrow_large_string_array_builder, + GARROW, + LARGE_STRING_ARRAY_BUILDER, + GArrowLargeBinaryArrayBuilder) +struct _GArrowLargeStringArrayBuilderClass +{ + GArrowLargeBinaryArrayBuilderClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_16 +GArrowLargeStringArrayBuilder *garrow_large_string_array_builder_new(void); +GARROW_AVAILABLE_IN_0_16 +gboolean garrow_large_string_array_builder_append_string(GArrowLargeStringArrayBuilder *builder, + const gchar *value, + GError **error); +GARROW_AVAILABLE_IN_0_16 +gboolean garrow_large_string_array_builder_append_strings(GArrowLargeStringArrayBuilder *builder, + const gchar **values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); + + +#define GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY_BUILDER \ + (garrow_fixed_size_binary_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeBinaryArrayBuilder, + garrow_fixed_size_binary_array_builder, + GARROW, + FIXED_SIZE_BINARY_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowFixedSizeBinaryArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GARROW_AVAILABLE_IN_3_0 +GArrowFixedSizeBinaryArrayBuilder * +garrow_fixed_size_binary_array_builder_new( + GArrowFixedSizeBinaryDataType *data_type); + +GARROW_AVAILABLE_IN_3_0 +gboolean +garrow_fixed_size_binary_array_builder_append_value( + GArrowFixedSizeBinaryArrayBuilder *builder, + const guint8 *value, + gint32 length, + GError **error); +GARROW_AVAILABLE_IN_3_0 +gboolean +garrow_fixed_size_binary_array_builder_append_value_bytes( + GArrowFixedSizeBinaryArrayBuilder *builder, + GBytes *value, + GError **error); +GARROW_AVAILABLE_IN_3_0 +gboolean +garrow_fixed_size_binary_array_builder_append_values( + GArrowFixedSizeBinaryArrayBuilder *builder, + GBytes **values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +GARROW_AVAILABLE_IN_3_0 +gboolean +garrow_fixed_size_binary_array_builder_append_values_packed( + GArrowFixedSizeBinaryArrayBuilder *builder, + GBytes *values, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); + +#define GARROW_TYPE_DATE32_ARRAY_BUILDER \ + (garrow_date32_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDate32ArrayBuilder, + garrow_date32_array_builder, + GARROW, + DATE32_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowDate32ArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GArrowDate32ArrayBuilder *garrow_date32_array_builder_new(void); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_date32_array_builder_append_value) +gboolean garrow_date32_array_builder_append(GArrowDate32ArrayBuilder *builder, + gint32 value, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_date32_array_builder_append_value(GArrowDate32ArrayBuilder *builder, + gint32 value, + GError **error); +gboolean garrow_date32_array_builder_append_values(GArrowDate32ArrayBuilder *builder, + const gint32 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +gboolean garrow_date32_array_builder_append_null(GArrowDate32ArrayBuilder *builder, + GError **error); +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) +gboolean garrow_date32_array_builder_append_nulls(GArrowDate32ArrayBuilder *builder, + gint64 n, + GError **error); +#endif + + +#define GARROW_TYPE_DATE64_ARRAY_BUILDER \ + (garrow_date64_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDate64ArrayBuilder, + garrow_date64_array_builder, + GARROW, + DATE64_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowDate64ArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GArrowDate64ArrayBuilder *garrow_date64_array_builder_new(void); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_date64_array_builder_append_value) +gboolean garrow_date64_array_builder_append(GArrowDate64ArrayBuilder *builder, + gint64 value, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_date64_array_builder_append_value(GArrowDate64ArrayBuilder *builder, + gint64 value, + GError **error); +gboolean garrow_date64_array_builder_append_values(GArrowDate64ArrayBuilder *builder, + const gint64 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +gboolean garrow_date64_array_builder_append_null(GArrowDate64ArrayBuilder *builder, + GError **error); +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) +gboolean garrow_date64_array_builder_append_nulls(GArrowDate64ArrayBuilder *builder, + gint64 n, + GError **error); +#endif + + +#define GARROW_TYPE_TIMESTAMP_ARRAY_BUILDER \ + (garrow_timestamp_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowTimestampArrayBuilder, + garrow_timestamp_array_builder, + GARROW, + TIMESTAMP_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowTimestampArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GArrowTimestampArrayBuilder * +garrow_timestamp_array_builder_new(GArrowTimestampDataType *data_type); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_timestamp_array_builder_append_value) +gboolean garrow_timestamp_array_builder_append(GArrowTimestampArrayBuilder *builder, + gint64 value, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_timestamp_array_builder_append_value(GArrowTimestampArrayBuilder *builder, + gint64 value, + GError **error); +gboolean garrow_timestamp_array_builder_append_values(GArrowTimestampArrayBuilder *builder, + const gint64 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +gboolean garrow_timestamp_array_builder_append_null(GArrowTimestampArrayBuilder *builder, + GError **error); +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) +gboolean garrow_timestamp_array_builder_append_nulls(GArrowTimestampArrayBuilder *builder, + gint64 n, + GError **error); +#endif + + +#define GARROW_TYPE_TIME32_ARRAY_BUILDER \ + (garrow_time32_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowTime32ArrayBuilder, + garrow_time32_array_builder, + GARROW, + TIME32_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowTime32ArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GArrowTime32ArrayBuilder *garrow_time32_array_builder_new(GArrowTime32DataType *data_type); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_time32_array_builder_append_value) +gboolean garrow_time32_array_builder_append(GArrowTime32ArrayBuilder *builder, + gint32 value, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_time32_array_builder_append_value(GArrowTime32ArrayBuilder *builder, + gint32 value, + GError **error); +gboolean garrow_time32_array_builder_append_values(GArrowTime32ArrayBuilder *builder, + const gint32 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +gboolean garrow_time32_array_builder_append_null(GArrowTime32ArrayBuilder *builder, + GError **error); +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) +gboolean garrow_time32_array_builder_append_nulls(GArrowTime32ArrayBuilder *builder, + gint64 n, + GError **error); +#endif + + +#define GARROW_TYPE_TIME64_ARRAY_BUILDER \ + (garrow_time64_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowTime64ArrayBuilder, + garrow_time64_array_builder, + GARROW, + TIME64_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowTime64ArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GArrowTime64ArrayBuilder *garrow_time64_array_builder_new(GArrowTime64DataType *data_type); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_time64_array_builder_append_value) +gboolean garrow_time64_array_builder_append(GArrowTime64ArrayBuilder *builder, + gint64 value, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_time64_array_builder_append_value(GArrowTime64ArrayBuilder *builder, + gint64 value, + GError **error); +gboolean garrow_time64_array_builder_append_values(GArrowTime64ArrayBuilder *builder, + const gint64 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +gboolean garrow_time64_array_builder_append_null(GArrowTime64ArrayBuilder *builder, + GError **error); +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) +gboolean garrow_time64_array_builder_append_nulls(GArrowTime64ArrayBuilder *builder, + gint64 n, + GError **error); +#endif + + +#define GARROW_TYPE_BINARY_DICTIONARY_ARRAY_BUILDER (garrow_binary_dictionary_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowBinaryDictionaryArrayBuilder, + garrow_binary_dictionary_array_builder, + GARROW, + BINARY_DICTIONARY_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowBinaryDictionaryArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GARROW_AVAILABLE_IN_2_0 +GArrowBinaryDictionaryArrayBuilder * +garrow_binary_dictionary_array_builder_new(void); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +GARROW_AVAILABLE_IN_2_0 +gboolean +garrow_binary_dictionary_array_builder_append_null(GArrowBinaryDictionaryArrayBuilder *builder, + GError **error); +#endif +GARROW_AVAILABLE_IN_2_0 +gboolean +garrow_binary_dictionary_array_builder_append_value(GArrowBinaryDictionaryArrayBuilder *builder, + const guint8 *value, + gint32 length, + GError **error); +GARROW_AVAILABLE_IN_2_0 +gboolean +garrow_binary_dictionary_array_builder_append_value_bytes(GArrowBinaryDictionaryArrayBuilder *builder, + GBytes *value, + GError **error); +GARROW_AVAILABLE_IN_2_0 +gboolean +garrow_binary_dictionary_array_builder_append_array(GArrowBinaryDictionaryArrayBuilder *builder, + GArrowBinaryArray *array, + GError **error); +GARROW_AVAILABLE_IN_2_0 +gboolean +garrow_binary_dictionary_array_builder_append_indices(GArrowBinaryDictionaryArrayBuilder *builder, + const gint64 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +GARROW_AVAILABLE_IN_2_0 +gint64 +garrow_binary_dictionary_array_builder_get_dictionary_length(GArrowBinaryDictionaryArrayBuilder *builder); +GARROW_AVAILABLE_IN_2_0 +gboolean +garrow_binary_dictionary_array_builder_finish_delta(GArrowBinaryDictionaryArrayBuilder* builder, + GArrowArray **out_indices, + GArrowArray **out_delta, + GError **error); +GARROW_AVAILABLE_IN_2_0 +gboolean +garrow_binary_dictionary_array_builder_insert_memo_values(GArrowBinaryDictionaryArrayBuilder *builder, + GArrowBinaryArray *values, + GError **error); +GARROW_AVAILABLE_IN_2_0 +void +garrow_binary_dictionary_array_builder_reset_full(GArrowBinaryDictionaryArrayBuilder *builder); + + +#define GARROW_TYPE_STRING_DICTIONARY_ARRAY_BUILDER (garrow_string_dictionary_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowStringDictionaryArrayBuilder, + garrow_string_dictionary_array_builder, + GARROW, + STRING_DICTIONARY_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowStringDictionaryArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GARROW_AVAILABLE_IN_2_0 +GArrowStringDictionaryArrayBuilder * +garrow_string_dictionary_array_builder_new(void); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +GARROW_AVAILABLE_IN_2_0 +gboolean +garrow_string_dictionary_array_builder_append_null(GArrowStringDictionaryArrayBuilder *builder, + GError **error); +#endif +GARROW_AVAILABLE_IN_2_0 +gboolean +garrow_string_dictionary_array_builder_append_string(GArrowStringDictionaryArrayBuilder *builder, + const gchar *value, + GError **error); +GARROW_AVAILABLE_IN_2_0 +gboolean +garrow_string_dictionary_array_builder_append_array(GArrowStringDictionaryArrayBuilder *builder, + GArrowStringArray *array, + GError **error); +GARROW_AVAILABLE_IN_2_0 +gboolean +garrow_string_dictionary_array_builder_append_indices(GArrowStringDictionaryArrayBuilder *builder, + const gint64 *values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +GARROW_AVAILABLE_IN_2_0 +gint64 +garrow_string_dictionary_array_builder_get_dictionary_length(GArrowStringDictionaryArrayBuilder *builder); +GARROW_AVAILABLE_IN_2_0 +gboolean +garrow_string_dictionary_array_builder_finish_delta(GArrowStringDictionaryArrayBuilder* builder, + GArrowArray **out_indices, + GArrowArray **out_delta, + GError **error); +GARROW_AVAILABLE_IN_2_0 +gboolean +garrow_string_dictionary_array_builder_insert_memo_values(GArrowStringDictionaryArrayBuilder *builder, + GArrowStringArray *values, + GError **error); +GARROW_AVAILABLE_IN_2_0 +void +garrow_string_dictionary_array_builder_reset_full(GArrowStringDictionaryArrayBuilder *builder); + + +#define GARROW_TYPE_LIST_ARRAY_BUILDER (garrow_list_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowListArrayBuilder, + garrow_list_array_builder, + GARROW, + LIST_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowListArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GArrowListArrayBuilder *garrow_list_array_builder_new(GArrowListDataType *data_type, + GError **error); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_list_array_builder_append_value) +gboolean garrow_list_array_builder_append(GArrowListArrayBuilder *builder, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_list_array_builder_append_value(GArrowListArrayBuilder *builder, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +gboolean garrow_list_array_builder_append_null(GArrowListArrayBuilder *builder, + GError **error); +#endif + +GArrowArrayBuilder *garrow_list_array_builder_get_value_builder(GArrowListArrayBuilder *builder); + + +#define GARROW_TYPE_LARGE_LIST_ARRAY_BUILDER (garrow_large_list_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowLargeListArrayBuilder, + garrow_large_list_array_builder, + GARROW, + LARGE_LIST_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowLargeListArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_16 +GArrowLargeListArrayBuilder *garrow_large_list_array_builder_new(GArrowLargeListDataType *data_type, + GError **error); +GARROW_AVAILABLE_IN_0_16 +gboolean garrow_large_list_array_builder_append_value(GArrowLargeListArrayBuilder *builder, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +GARROW_AVAILABLE_IN_0_16 +gboolean garrow_large_list_array_builder_append_null(GArrowLargeListArrayBuilder *builder, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_16 +GArrowArrayBuilder *garrow_large_list_array_builder_get_value_builder(GArrowLargeListArrayBuilder *builder); + + +#define GARROW_TYPE_STRUCT_ARRAY_BUILDER \ + (garrow_struct_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowStructArrayBuilder, + garrow_struct_array_builder, + GARROW, + STRUCT_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowStructArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GArrowStructArrayBuilder *garrow_struct_array_builder_new(GArrowStructDataType *data_type, + GError **error); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_struct_array_builder_append_value) +gboolean garrow_struct_array_builder_append(GArrowStructArrayBuilder *builder, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_struct_array_builder_append_value(GArrowStructArrayBuilder *builder, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +gboolean garrow_struct_array_builder_append_null(GArrowStructArrayBuilder *builder, + GError **error); +#endif + +GArrowArrayBuilder *garrow_struct_array_builder_get_field_builder(GArrowStructArrayBuilder *builder, + gint i); +GList *garrow_struct_array_builder_get_field_builders(GArrowStructArrayBuilder *builder); + + +#define GARROW_TYPE_MAP_ARRAY_BUILDER \ + (garrow_map_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowMapArrayBuilder, + garrow_map_array_builder, + GARROW, + MAP_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowMapArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_17 +GArrowMapArrayBuilder *garrow_map_array_builder_new(GArrowMapDataType *data_type, + GError **error); +GARROW_AVAILABLE_IN_0_17 +gboolean +garrow_map_array_builder_append_value(GArrowMapArrayBuilder *builder, + GError **error); +GARROW_AVAILABLE_IN_0_17 +gboolean +garrow_map_array_builder_append_values(GArrowMapArrayBuilder *builder, + const gint32 *offsets, + gint64 offsets_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +GARROW_AVAILABLE_IN_0_17 +gboolean +garrow_map_array_builder_append_null(GArrowMapArrayBuilder *builder, + GError **error); +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) +GARROW_AVAILABLE_IN_0_17 +gboolean +garrow_map_array_builder_append_nulls(GArrowMapArrayBuilder *builder, + gint64 n, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_17 +GArrowArrayBuilder * +garrow_map_array_builder_get_key_builder(GArrowMapArrayBuilder *builder); +GARROW_AVAILABLE_IN_0_17 +GArrowArrayBuilder * +garrow_map_array_builder_get_item_builder(GArrowMapArrayBuilder *builder); +GARROW_AVAILABLE_IN_0_17 +GArrowArrayBuilder * +garrow_map_array_builder_get_value_builder(GArrowMapArrayBuilder *builder); + + +#define GARROW_TYPE_DECIMAL128_ARRAY_BUILDER (garrow_decimal128_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128ArrayBuilder, + garrow_decimal128_array_builder, + GARROW, + DECIMAL128_ARRAY_BUILDER, + GArrowFixedSizeBinaryArrayBuilder) +struct _GArrowDecimal128ArrayBuilderClass +{ + GArrowFixedSizeBinaryArrayBuilderClass parent_class; +}; + +GArrowDecimal128ArrayBuilder *garrow_decimal128_array_builder_new(GArrowDecimal128DataType *data_type); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_decimal128_array_builder_append_value) +gboolean garrow_decimal128_array_builder_append(GArrowDecimal128ArrayBuilder *builder, + GArrowDecimal128 *value, + GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_decimal128_array_builder_append_value(GArrowDecimal128ArrayBuilder *builder, + GArrowDecimal128 *value, + GError **error); +GARROW_AVAILABLE_IN_3_0 +gboolean +garrow_decimal128_array_builder_append_values( + GArrowDecimal128ArrayBuilder *builder, + GArrowDecimal128 **values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_decimal128_array_builder_append_null(GArrowDecimal128ArrayBuilder *builder, + GError **error); +#endif + + +#define GARROW_TYPE_DECIMAL256_ARRAY_BUILDER (garrow_decimal256_array_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256ArrayBuilder, + garrow_decimal256_array_builder, + GARROW, + DECIMAL256_ARRAY_BUILDER, + GArrowFixedSizeBinaryArrayBuilder) +struct _GArrowDecimal256ArrayBuilderClass +{ + GArrowFixedSizeBinaryArrayBuilderClass parent_class; +}; + +GArrowDecimal256ArrayBuilder *garrow_decimal256_array_builder_new(GArrowDecimal256DataType *data_type); + +GARROW_AVAILABLE_IN_3_0 +gboolean garrow_decimal256_array_builder_append_value(GArrowDecimal256ArrayBuilder *builder, + GArrowDecimal256 *value, + GError **error); +GARROW_AVAILABLE_IN_3_0 +gboolean +garrow_decimal256_array_builder_append_values( + GArrowDecimal256ArrayBuilder *builder, + GArrowDecimal256 **values, + gint64 values_length, + const gboolean *is_valids, + gint64 is_valids_length, + GError **error); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/array-builder.hpp b/src/arrow/c_glib/arrow-glib/array-builder.hpp new file mode 100644 index 000000000..bcdc58fd8 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/array-builder.hpp @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/array.hpp> +#include <arrow-glib/array-builder.h> + +GArrowArrayBuilder *garrow_array_builder_new_raw(arrow::ArrayBuilder *arrow_builder, + GType type=G_TYPE_INVALID); +arrow::ArrayBuilder *garrow_array_builder_get_raw(GArrowArrayBuilder *builder); diff --git a/src/arrow/c_glib/arrow-glib/array.h b/src/arrow/c_glib/arrow-glib/array.h new file mode 100644 index 000000000..9a845597d --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/array.h @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/basic-array.h> +#include <arrow-glib/composite-array.h> diff --git a/src/arrow/c_glib/arrow-glib/array.hpp b/src/arrow/c_glib/arrow-glib/array.hpp new file mode 100644 index 000000000..e575c4253 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/array.hpp @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/basic-array.hpp> +#include <arrow-glib/composite-array.h> diff --git a/src/arrow/c_glib/arrow-glib/arrow-glib.h b/src/arrow/c_glib/arrow-glib/arrow-glib.h new file mode 100644 index 000000000..57a3508d3 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/arrow-glib.h @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/gobject-type.h> +#include <arrow-glib/version.h> + +#include <arrow-glib/array.h> +#include <arrow-glib/array-builder.h> +#include <arrow-glib/chunked-array.h> +#include <arrow-glib/codec.h> +#include <arrow-glib/compute.h> +#include <arrow-glib/data-type.h> +#include <arrow-glib/datum.h> +#include <arrow-glib/enums.h> +#include <arrow-glib/error.h> +#include <arrow-glib/expression.h> +#include <arrow-glib/field.h> +#include <arrow-glib/record-batch.h> +#include <arrow-glib/scalar.h> +#include <arrow-glib/schema.h> +#include <arrow-glib/table.h> +#include <arrow-glib/table-builder.h> +#include <arrow-glib/tensor.h> +#include <arrow-glib/type.h> + +#include <arrow-glib/file.h> +#include <arrow-glib/file-mode.h> +#include <arrow-glib/input-stream.h> +#include <arrow-glib/output-stream.h> +#include <arrow-glib/readable.h> +#include <arrow-glib/writable.h> +#include <arrow-glib/writable-file.h> + +#include <arrow-glib/ipc-options.h> +#include <arrow-glib/metadata-version.h> +#include <arrow-glib/reader.h> +#include <arrow-glib/writer.h> + +#include <arrow-glib/file-system.h> +#include <arrow-glib/local-file-system.h> diff --git a/src/arrow/c_glib/arrow-glib/arrow-glib.hpp b/src/arrow/c_glib/arrow-glib/arrow-glib.hpp new file mode 100644 index 000000000..621c474af --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/arrow-glib.hpp @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/arrow-glib.h> + +#include <arrow-glib/array.hpp> +#include <arrow-glib/array-builder.hpp> +#include <arrow-glib/buffer.hpp> +#include <arrow-glib/chunked-array.hpp> +#include <arrow-glib/codec.hpp> +#include <arrow-glib/compute.hpp> +#include <arrow-glib/data-type.hpp> +#include <arrow-glib/datum.hpp> +#include <arrow-glib/error.hpp> +#include <arrow-glib/expression.hpp> +#include <arrow-glib/field.hpp> +#include <arrow-glib/record-batch.hpp> +#include <arrow-glib/scalar.hpp> +#include <arrow-glib/schema.hpp> +#include <arrow-glib/table.hpp> +#include <arrow-glib/table-builder.hpp> +#include <arrow-glib/tensor.hpp> +#include <arrow-glib/type.hpp> + +#include <arrow-glib/file.hpp> +#include <arrow-glib/file-mode.hpp> +#include <arrow-glib/input-stream.hpp> +#include <arrow-glib/output-stream.hpp> +#include <arrow-glib/readable.hpp> +#include <arrow-glib/writable.hpp> +#include <arrow-glib/writable-file.hpp> + +#include <arrow-glib/ipc-options.hpp> +#include <arrow-glib/metadata-version.hpp> +#include <arrow-glib/reader.hpp> +#include <arrow-glib/writer.hpp> diff --git a/src/arrow/c_glib/arrow-glib/basic-array.cpp b/src/arrow/c_glib/arrow-glib/basic-array.cpp new file mode 100644 index 000000000..e9315e7e1 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/basic-array.cpp @@ -0,0 +1,3309 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/array.hpp> +#include <arrow-glib/basic-data-type.hpp> +#include <arrow-glib/buffer.hpp> +#include <arrow-glib/decimal.hpp> +#include <arrow-glib/error.hpp> +#include <arrow-glib/type.hpp> + +#include <arrow/c/bridge.h> + +#include <sstream> + +G_BEGIN_DECLS + +/** + * SECTION: basic-array + * @section_id: basic-array-classes + * @title: Basic array classes + * @include: arrow-glib/arrow-glib.h + * + * #GArrowArray is a base class for all array classes such as + * #GArrowBooleanArray. + * + * All array classes are immutable. You need to use binary data or + * array builder to create a new array except #GArrowNullArray. If you + * have binary data that uses Arrow format data, you can create a new + * array with the binary data as #GArrowBuffer object. If you don't + * have binary data, you can use array builder class such as + * #GArrowBooleanArrayBuilder that creates Arrow format data + * internally and a new array from the data. + * + * #GArrowNullArray is a class for null array. It can store zero or + * more null values. You need to specify an array length to create a + * new array. + * + * #GArrowBooleanArray is a class for boolean array. It can store zero + * or more boolean data. If you don't have Arrow format data, you need + * to use #GArrowBooleanArrayBuilder to create a new array. + * + * #GArrowInt8Array is a class for 8-bit integer array. It can store + * zero or more 8-bit integer data. If you don't have Arrow format + * data, you need to use #GArrowInt8ArrayBuilder to create a new + * array. + * + * #GArrowUInt8Array is a class for 8-bit unsigned integer array. It + * can store zero or more 8-bit unsigned integer data. If you don't + * have Arrow format data, you need to use #GArrowUInt8ArrayBuilder to + * create a new array. + * + * #GArrowInt16Array is a class for 16-bit integer array. It can store + * zero or more 16-bit integer data. If you don't have Arrow format + * data, you need to use #GArrowInt16ArrayBuilder to create a new + * array. + * + * #GArrowUInt16Array is a class for 16-bit unsigned integer array. It + * can store zero or more 16-bit unsigned integer data. If you don't + * have Arrow format data, you need to use #GArrowUInt16ArrayBuilder + * to create a new array. + * + * #GArrowInt32Array is a class for 32-bit integer array. It can store + * zero or more 32-bit integer data. If you don't have Arrow format + * data, you need to use #GArrowInt32ArrayBuilder to create a new + * array. + * + * #GArrowUInt32Array is a class for 32-bit unsigned integer array. It + * can store zero or more 32-bit unsigned integer data. If you don't + * have Arrow format data, you need to use #GArrowUInt32ArrayBuilder + * to create a new array. + * + * #GArrowInt64Array is a class for 64-bit integer array. It can store + * zero or more 64-bit integer data. If you don't have Arrow format + * data, you need to use #GArrowInt64ArrayBuilder to create a new + * array. + * + * #GArrowUInt64Array is a class for 64-bit unsigned integer array. It + * can store zero or more 64-bit unsigned integer data. If you don't + * have Arrow format data, you need to use #GArrowUInt64ArrayBuilder + * to create a new array. + * + * #GArrowFloatArray is a class for 32-bit floating point array. It + * can store zero or more 32-bit floating data. If you don't have + * Arrow format data, you need to use #GArrowFloatArrayBuilder to + * create a new array. + * + * #GArrowDoubleArray is a class for 64-bit floating point array. It + * can store zero or more 64-bit floating data. If you don't have + * Arrow format data, you need to use #GArrowDoubleArrayBuilder to + * create a new array. + * + * #GArrowBinaryArray is a class for binary array. It can store zero + * or more binary data. If you don't have Arrow format data, you need + * to use #GArrowBinaryArrayBuilder to create a new array. + * + * #GArrowLargeBinaryArray is a class for 64-bit offsets binary array. + * It can store zero or more binary data. If you don't have Arrow + * format data, you need to use #GArrowLargeBinaryArrayBuilder to + * create a new array. + * + * #GArrowStringArray is a class for UTF-8 encoded string array. It + * can store zero or more UTF-8 encoded string data. If you don't have + * Arrow format data, you need to use #GArrowStringArrayBuilder to + * create a new array. + * + * #GArrowLargeStringArray is a class for 64-bit offsets UTF-8 + * encoded string array. It can store zero or more UTF-8 encoded + * string data. If you don't have Arrow format data, you need to + * use #GArrowLargeStringArrayBuilder to create a new array. + * + * #GArrowFixedSizeBinaryArray is a class for fixed size binary array. + * It can store zero or more fixed size binary data. If you don't have + * Arrow format data, you need to use + * #GArrowFixedSizeBinaryArrayBuilder to create a new array. + * + * #GArrowDate32Array is a class for the number of days since UNIX + * epoch in 32-bit signed integer array. It can store zero or more + * date data. If you don't have Arrow format data, you need to use + * #GArrowDate32ArrayBuilder to create a new array. + * + * #GArrowDate64Array is a class for the number of milliseconds since + * UNIX epoch in 64-bit signed integer array. It can store zero or + * more date data. If you don't have Arrow format data, you need to + * use #GArrowDate64ArrayBuilder to create a new array. + * + * #GArrowTimestampArray is a class for the number of + * seconds/milliseconds/microseconds/nanoseconds since UNIX epoch in + * 64-bit signed integer array. It can store zero or more timestamp + * data. If you don't have Arrow format data, you need to use + * #GArrowTimestampArrayBuilder to create a new array. + * + * #GArrowTime32Array is a class for the number of seconds or + * milliseconds since midnight in 32-bit signed integer array. It can + * store zero or more time data. If you don't have Arrow format data, + * you need to use #GArrowTime32ArrayBuilder to create a new array. + * + * #GArrowTime64Array is a class for the number of microseconds or + * nanoseconds since midnight in 64-bit signed integer array. It can + * store zero or more time data. If you don't have Arrow format data, + * you need to use #GArrowTime64ArrayBuilder to create a new array. + * + * #GArrowDecimal128Array is a class for 128-bit decimal array. It can + * store zero or more 128-bit decimal data. If you don't have Arrow + * format data, you need to use #GArrowDecimal128ArrayBuilder to + * create a new array. + * + * #GArrowDecimal256Array is a class for 256-bit decimal array. It can + * store zero or more 256-bit decimal data. If you don't have Arrow + * format data, you need to use #GArrowDecimal256ArrayBuilder to + * create a new array. + * + * #GArrowExtensionArray is a base class for array of user-defined + * extension types. + */ + +typedef struct GArrowEqualOptionsPrivate_ { + gboolean approx; + arrow::EqualOptions options; +} GArrowEqualOptionsPrivate; + +enum { + PROP_APPROX = 1, + PROP_NANS_EQUAL, + PROP_ABSOLUTE_TOLERANCE, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowEqualOptions, + garrow_equal_options, + G_TYPE_OBJECT) + +#define GARROW_EQUAL_OPTIONS_GET_PRIVATE(object) \ + static_cast<GArrowEqualOptionsPrivate *>( \ + garrow_equal_options_get_instance_private( \ + GARROW_EQUAL_OPTIONS(object))) + +static void +garrow_equal_options_finalize(GObject *object) +{ + auto priv = GARROW_EQUAL_OPTIONS_GET_PRIVATE(object); + priv->options.~EqualOptions(); + G_OBJECT_CLASS(garrow_equal_options_parent_class)->finalize(object); +} + +static void +garrow_equal_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_EQUAL_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_APPROX: + priv->approx = g_value_get_boolean(value); + break; + case PROP_NANS_EQUAL: + priv->options = priv->options.nans_equal(g_value_get_boolean(value)); + break; + case PROP_ABSOLUTE_TOLERANCE: + priv->options = priv->options.atol(g_value_get_double(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_equal_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_EQUAL_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_APPROX: + g_value_set_boolean(value, priv->approx); + break; + case PROP_NANS_EQUAL: + g_value_set_boolean(value, priv->options.nans_equal()); + break; + case PROP_ABSOLUTE_TOLERANCE: + g_value_set_double(value, priv->options.atol()); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_equal_options_init(GArrowEqualOptions *object) +{ + auto priv = GARROW_EQUAL_OPTIONS_GET_PRIVATE(object); + priv->approx = FALSE; + new(&priv->options) arrow::EqualOptions; + priv->options = arrow::EqualOptions::Defaults(); +} + +static void +garrow_equal_options_class_init(GArrowEqualOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->finalize = garrow_equal_options_finalize; + gobject_class->set_property = garrow_equal_options_set_property; + gobject_class->get_property = garrow_equal_options_get_property; + + auto options = arrow::EqualOptions::Defaults(); + GParamSpec *spec; + /** + * GArrowEqualOptions:approx: + * + * Whether or not approximate comparison is used. + * + * Since: 5.0.0 + */ + spec = g_param_spec_boolean("approx", + "Approx", + "Whether or not approximate comparison is used", + FALSE, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_APPROX, spec); + + /** + * GArrowEqualOptions:nans-equal: + * + * Whether or not NaNs are considered equal. + * + * Since: 5.0.0 + */ + spec = g_param_spec_boolean("nans-equal", + "NaNs equal", + "Whether or not NaNs are considered equal", + options.nans_equal(), + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_NANS_EQUAL, spec); + + /** + * GArrowEqualOptions:absolute-tolerance: + * + * The absolute tolerance for approximate comparison of + * floating-point values. + * + * Since: 5.0.0 + */ + spec = g_param_spec_double("absolute-tolerance", + "Absolute tolerance", + "The absolute tolerance for approximate comparison " + "of floating-point values", + -G_MAXDOUBLE, + G_MAXDOUBLE, + options.atol(), + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_ABSOLUTE_TOLERANCE, spec); +} + +/** + * garrow_equal_options_new: + * + * Returns: A newly created #GArrowEqualOptions. + * + * Since: 5.0.0 + */ +GArrowEqualOptions * +garrow_equal_options_new(void) +{ + auto equal_options = g_object_new(GARROW_TYPE_EQUAL_OPTIONS, NULL); + return GARROW_EQUAL_OPTIONS(equal_options); +} + +/** + * garrow_equal_options_is_approx: + * @options: A #GArrowEqualOptions. + * + * Returns: %TRUE if approximate comparison is used, %FALSE otherwise. + * + * Since: 5.0.0 + */ +gboolean +garrow_equal_options_is_approx(GArrowEqualOptions *options) +{ + auto priv = GARROW_EQUAL_OPTIONS_GET_PRIVATE(options); + return priv->approx; +} + + +typedef struct GArrowArrayPrivate_ { + std::shared_ptr<arrow::Array> array; + GArrowDataType *value_data_type; + GArrowBuffer *null_bitmap; + // Data for primitive array, value offsets for list array, type + // codes for union array and so on. + GArrowBuffer *buffer1; + // Data for binary array, value offsets for dense union array and so + // on. + GArrowBuffer *buffer2; + GArrowArray *parent; +} GArrowArrayPrivate; + +enum { + PROP_ARRAY = 1, + PROP_VALUE_DATA_TYPE, + PROP_NULL_BITMAP, + PROP_BUFFER1, + PROP_BUFFER2, + PROP_PARENT, +}; + +G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowArray, + garrow_array, + G_TYPE_OBJECT) + +#define GARROW_ARRAY_GET_PRIVATE(obj) \ + static_cast<GArrowArrayPrivate *>( \ + garrow_array_get_instance_private( \ + GARROW_ARRAY(obj))) + +G_END_DECLS +template <typename T> +const typename T::c_type * +garrow_array_get_values_raw(std::shared_ptr<arrow::Array> arrow_array, + gint64 *length) +{ + auto arrow_specific_array = + std::static_pointer_cast<typename arrow::TypeTraits<T>::ArrayType>(arrow_array); + *length = arrow_specific_array->length(); + return arrow_specific_array->raw_values(); +}; +G_BEGIN_DECLS + +static void +garrow_array_dispose(GObject *object) +{ + auto priv = GARROW_ARRAY_GET_PRIVATE(object); + + if (priv->value_data_type) { + g_object_unref(priv->value_data_type); + priv->value_data_type = NULL; + } + + if (priv->null_bitmap) { + g_object_unref(priv->null_bitmap); + priv->null_bitmap = NULL; + } + + if (priv->buffer1) { + g_object_unref(priv->buffer1); + priv->buffer1 = NULL; + } + + if (priv->buffer2) { + g_object_unref(priv->buffer2); + priv->buffer2 = NULL; + } + + if (priv->parent) { + g_object_unref(priv->parent); + priv->parent = NULL; + } + + G_OBJECT_CLASS(garrow_array_parent_class)->dispose(object); +} + +static void +garrow_array_finalize(GObject *object) +{ + auto priv = GARROW_ARRAY_GET_PRIVATE(object); + + priv->array.~shared_ptr(); + + G_OBJECT_CLASS(garrow_array_parent_class)->finalize(object); +} + +static void +garrow_array_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_ARRAY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_ARRAY: + priv->array = + *static_cast<std::shared_ptr<arrow::Array> *>(g_value_get_pointer(value)); + break; + case PROP_VALUE_DATA_TYPE: + priv->value_data_type = GARROW_DATA_TYPE(g_value_dup_object(value)); + break; + case PROP_NULL_BITMAP: + priv->null_bitmap = GARROW_BUFFER(g_value_dup_object(value)); + break; + case PROP_BUFFER1: + priv->buffer1 = GARROW_BUFFER(g_value_dup_object(value)); + break; + case PROP_BUFFER2: + priv->buffer2 = GARROW_BUFFER(g_value_dup_object(value)); + break; + case PROP_PARENT: + priv->parent = GARROW_ARRAY(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_array_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_ARRAY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_VALUE_DATA_TYPE: + g_value_set_object(value, priv->value_data_type); + break; + case PROP_NULL_BITMAP: + g_value_set_object(value, priv->null_bitmap); + break; + case PROP_BUFFER1: + g_value_set_object(value, priv->buffer1); + break; + case PROP_BUFFER2: + g_value_set_object(value, priv->buffer2); + break; + case PROP_PARENT: + g_value_set_object(value, priv->parent); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_array_init(GArrowArray *object) +{ + auto priv = GARROW_ARRAY_GET_PRIVATE(object); + new(&priv->array) std::shared_ptr<arrow::Array>; +} + +static void +garrow_array_class_init(GArrowArrayClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_array_dispose; + gobject_class->finalize = garrow_array_finalize; + gobject_class->set_property = garrow_array_set_property; + gobject_class->get_property = garrow_array_get_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("array", + "Array", + "The raw std::shared<arrow::Array> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_ARRAY, spec); + + spec = g_param_spec_object("value-data-type", + "Value data type", + "The data type of each value", + GARROW_TYPE_DATA_TYPE, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_VALUE_DATA_TYPE, spec); + + spec = g_param_spec_object("null-bitmap", + "NULL bitmap", + "The NULL bitmap", + GARROW_TYPE_BUFFER, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_NULL_BITMAP, spec); + + spec = g_param_spec_object("buffer1", + "Buffer1", + "The first buffer", + GARROW_TYPE_BUFFER, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_BUFFER1, spec); + + spec = g_param_spec_object("buffer2", + "Buffer2", + "The second buffer", + GARROW_TYPE_BUFFER, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_BUFFER2, spec); + + spec = g_param_spec_object("parent", + "Parent", + "The parent array", + GARROW_TYPE_ARRAY, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_PARENT, spec); +} + +/** + * garrow_array_import: + * @c_abi_array: (not nullable): A `struct ArrowArray *`. + * @data_type: A #GArrowDataType of the C ABI array. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): An imported #GArrowArray + * on success, %NULL on error. + * + * You don't need to release the passed `struct ArrowArray *`, + * even if this function reports an error. + * + * Since: 6.0.0 + */ +GArrowArray * +garrow_array_import(gpointer c_abi_array, + GArrowDataType *data_type, + GError **error) +{ + auto arrow_data_type = garrow_data_type_get_raw(data_type); + auto arrow_array_result = + arrow::ImportArray(static_cast<ArrowArray *>(c_abi_array), + arrow_data_type); + if (garrow::check(error, arrow_array_result, "[array][import]")) { + return garrow_array_new_raw(&(*arrow_array_result)); + } else { + return NULL; + } +} + +/** + * garrow_array_export: + * @array: A #GArrowArray. + * @c_abi_array: (out): Return location for a `struct ArrowArray *`. + * It should be freed with the `ArrowArray::release` callback then + * g_free() when no longer needed. + * @c_abi_schema: (out) (nullable): Return location for a + * `struct ArrowSchema *` or %NULL. + * It should be freed with the `ArrowSchema::release` callback then + * g_free() when no longer needed. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE on error. + * + * Since: 6.0.0 + */ +gboolean +garrow_array_export(GArrowArray *array, + gpointer *c_abi_array, + gpointer *c_abi_schema, + GError **error) +{ + const auto arrow_array = garrow_array_get_raw(array); + *c_abi_array = g_new(ArrowArray, 1); + arrow::Status status; + if (c_abi_schema) { + *c_abi_schema = g_new(ArrowSchema, 1); + status = arrow::ExportArray(*arrow_array, + static_cast<ArrowArray *>(*c_abi_array), + static_cast<ArrowSchema *>(*c_abi_schema)); + } else { + status = arrow::ExportArray(*arrow_array, + static_cast<ArrowArray *>(*c_abi_array)); + } + if (garrow::check(error, status, "[array][export]")) { + return true; + } else { + g_free(*c_abi_array); + *c_abi_array = nullptr; + if (c_abi_schema) { + g_free(*c_abi_schema); + *c_abi_schema = nullptr; + } + return false; + } +} + +/** + * garrow_array_equal: + * @array: A #GArrowArray. + * @other_array: A #GArrowArray to be compared. + * + * Returns: %TRUE if both of them have the same data, %FALSE + * otherwise. + * + * Since: 0.4.0 + */ +gboolean +garrow_array_equal(GArrowArray *array, GArrowArray *other_array) +{ + return garrow_array_equal_options(array, other_array, NULL); +} + +/** + * garrow_array_equal_options: + * @array: A #GArrowArray. + * @other_array: A #GArrowArray to be compared. + * @options: (nullable): A #GArrowEqualOptions to custom how to compare. + * + * Returns: %TRUE if both of them have the same data, %FALSE + * otherwise. + * + * Since: 5.0.0 + */ +gboolean +garrow_array_equal_options(GArrowArray *array, + GArrowArray *other_array, + GArrowEqualOptions *options) +{ + const auto arrow_array = garrow_array_get_raw(array); + const auto arrow_other_array = garrow_array_get_raw(other_array); + if (options) { + auto is_approx = garrow_equal_options_is_approx(options); + const auto arrow_options = garrow_equal_options_get_raw(options); + if (is_approx) { + return arrow_array->ApproxEquals(arrow_other_array, *arrow_options); + } else { + return arrow_array->Equals(arrow_other_array, *arrow_options); + } + } else { + return arrow_array->Equals(arrow_other_array); + } +} + +/** + * garrow_array_equal_approx: + * @array: A #GArrowArray. + * @other_array: A #GArrowArray to be compared. + * + * Returns: %TRUE if both of them have the approx same data, %FALSE + * otherwise. + * + * Since: 0.4.0 + */ +gboolean +garrow_array_equal_approx(GArrowArray *array, GArrowArray *other_array) +{ + const auto arrow_array = garrow_array_get_raw(array); + const auto arrow_other_array = garrow_array_get_raw(other_array); + return arrow_array->ApproxEquals(arrow_other_array); +} + +/** + * garrow_array_equal_range: + * @array: A #GArrowArray. + * @start_index: The start index of @array to be used. + * @other_array: A #GArrowArray to be compared. + * @other_start_index: The start index of @other_array to be used. + * @end_index: The end index of @array to be used. The end index of + * @other_array is "@other_start_index + (@end_index - + * @start_index)". + * @options: (nullable): A #GArrowEqualOptions to custom how to compare. + * + * Returns: %TRUE if both of them have the same data in the range, + * %FALSE otherwise. + * + * Since: 0.4.0 + */ +gboolean +garrow_array_equal_range(GArrowArray *array, + gint64 start_index, + GArrowArray *other_array, + gint64 other_start_index, + gint64 end_index, + GArrowEqualOptions *options) +{ + const auto arrow_array = garrow_array_get_raw(array); + const auto arrow_other_array = garrow_array_get_raw(other_array); + if (options) { + const auto arrow_options = garrow_equal_options_get_raw(options); + return arrow_array->RangeEquals(arrow_other_array, + start_index, + end_index, + other_start_index, + *arrow_options); + } else { + return arrow_array->RangeEquals(arrow_other_array, + start_index, + end_index, + other_start_index); + } +} + +/** + * garrow_array_is_null: + * @array: A #GArrowArray. + * @i: The index of the target value. + * + * Returns: Whether the @i-th value is null or not. + * + * Since: 0.3.0 + */ +gboolean +garrow_array_is_null(GArrowArray *array, gint64 i) +{ + auto arrow_array = garrow_array_get_raw(array); + return arrow_array->IsNull(i); +} + +/** + * garrow_array_is_valid: + * @array: A #GArrowArray. + * @i: The index of the target value. + * + * Returns: Whether the @i-th value is valid (not null) or not. + * + * Since: 0.8.0 + */ +gboolean +garrow_array_is_valid(GArrowArray *array, gint64 i) +{ + auto arrow_array = garrow_array_get_raw(array); + return arrow_array->IsValid(i); +} + +/** + * garrow_array_get_length: + * @array: A #GArrowArray. + * + * Returns: The number of rows in the array. + */ +gint64 +garrow_array_get_length(GArrowArray *array) +{ + auto arrow_array = garrow_array_get_raw(array); + return arrow_array->length(); +} + +/** + * garrow_array_get_offset: + * @array: A #GArrowArray. + * + * Returns: The number of values in the array. + */ +gint64 +garrow_array_get_offset(GArrowArray *array) +{ + auto arrow_array = garrow_array_get_raw(array); + return arrow_array->offset(); +} + +/** + * garrow_array_get_n_nulls: + * @array: A #GArrowArray. + * + * Returns: The number of NULLs in the array. + */ +gint64 +garrow_array_get_n_nulls(GArrowArray *array) +{ + auto arrow_array = garrow_array_get_raw(array); + return arrow_array->null_count(); +} + +/** + * garrow_array_get_null_bitmap: + * @array: A #GArrowArray. + * + * Returns: (transfer full) (nullable): The bitmap that indicates null + * value indices for the array as #GArrowBuffer or %NULL when + * garrow_array_get_n_nulls() returns 0. + * + * Since: 0.3.0 + */ +GArrowBuffer * +garrow_array_get_null_bitmap(GArrowArray *array) +{ + auto priv = GARROW_ARRAY_GET_PRIVATE(array); + if (priv->null_bitmap) { + g_object_ref(priv->null_bitmap); + return priv->null_bitmap; + } + + auto arrow_array = garrow_array_get_raw(array); + auto arrow_null_bitmap = arrow_array->null_bitmap(); + return garrow_buffer_new_raw(&arrow_null_bitmap); +} + +/** + * garrow_array_get_value_data_type: + * @array: A #GArrowArray. + * + * Returns: (transfer full): The #GArrowDataType for each value of the + * array. + * + * Since: 0.3.0 + */ +GArrowDataType * +garrow_array_get_value_data_type(GArrowArray *array) +{ + auto priv = GARROW_ARRAY_GET_PRIVATE(array); + if (priv->value_data_type) { + g_object_ref(priv->value_data_type); + return priv->value_data_type; + } + + auto arrow_array = garrow_array_get_raw(array); + auto arrow_data_type = arrow_array->type(); + return garrow_data_type_new_raw(&arrow_data_type); +} + +/** + * garrow_array_get_value_type: + * @array: A #GArrowArray. + * + * Returns: The #GArrowType for each value of the array. + * + * Since: 0.3.0 + */ +GArrowType +garrow_array_get_value_type(GArrowArray *array) +{ + auto arrow_array = garrow_array_get_raw(array); + return garrow_type_from_raw(arrow_array->type_id()); +} + +/** + * garrow_array_slice: + * @array: A #GArrowArray. + * @offset: The offset of sub #GArrowArray. + * @length: The length of sub #GArrowArray. + * + * Returns: (transfer full): The sub #GArrowArray. It covers only from + * `offset` to `offset + length` range. The sub #GArrowArray shares + * values with the base #GArrowArray. + */ +GArrowArray * +garrow_array_slice(GArrowArray *array, + gint64 offset, + gint64 length) +{ + const auto arrow_array = garrow_array_get_raw(array); + auto arrow_sub_array = arrow_array->Slice(offset, length); + return garrow_array_new_raw(&arrow_sub_array, + "array", &arrow_sub_array, + "parent", array, + NULL); +} + +/** + * garrow_array_to_string: + * @array: A #GArrowArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): + * The formatted array content or %NULL on error. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 0.4.0 + */ +gchar * +garrow_array_to_string(GArrowArray *array, GError **error) +{ + const auto arrow_array = garrow_array_get_raw(array); + return g_strdup(arrow_array->ToString().c_str()); +} + +/** + * garrow_array_view: + * @array: A #GArrowArray. + * @return_type: A #GArrowDataType of the returned view. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): A zero-copy view of this array + * with the given type. This method checks if the `return_type` are + * layout-compatible. + * + * Since: 0.15.0 + */ +GArrowArray * +garrow_array_view(GArrowArray *array, + GArrowDataType *return_type, + GError **error) +{ + auto arrow_array_raw = garrow_array_get_raw(array); + auto arrow_return_type = garrow_data_type_get_raw(return_type); + auto arrow_array = arrow_array_raw->View(arrow_return_type); + if (garrow::check(error, arrow_array, "[array][view]")) { + return garrow_array_new_raw(&(*arrow_array)); + } else { + return NULL; + } +} + +/** + * garrow_array_diff_unified: + * @array: A #GArrowArray. + * @other_array: A #GArrowArray to be compared. + * + * Returns: (nullable) (transfer full): The string representation of + * the difference between two arrays as unified format. If there is + * no difference, the return value is %NULL. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 0.15.0 + */ +gchar * +garrow_array_diff_unified(GArrowArray *array, GArrowArray *other_array) +{ + const auto arrow_array = garrow_array_get_raw(array); + const auto arrow_other_array = garrow_array_get_raw(other_array); + std::stringstream diff; + arrow_array->Equals(arrow_other_array, + arrow::EqualOptions().diff_sink(&diff)); + auto string = diff.str(); + if (string.empty()) { + return NULL; + } else { + return g_strndup(string.data(), string.size()); + } +} + +/** + * garrow_array_concatenate: + * @array: A #GArrowArray. + * @other_arrays: (element-type GArrowArray): A #GArrowArray to be + * concatenated. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The concatenated array. + * + * Since: 4.0.0 + */ +GArrowArray * +garrow_array_concatenate(GArrowArray *array, + GList *other_arrays, + GError **error) +{ + if (!other_arrays) { + g_object_ref(array); + return array; + } + arrow::ArrayVector arrow_arrays; + arrow_arrays.push_back(garrow_array_get_raw(array)); + for (auto node = other_arrays; node; node = node->next) { + auto other_array = GARROW_ARRAY(node->data); + arrow_arrays.push_back(garrow_array_get_raw(other_array)); + } + auto arrow_concatenated_array = arrow::Concatenate(arrow_arrays); + if (garrow::check(error, + arrow_concatenated_array, + "[array][concatenate]")) { + return garrow_array_new_raw(&(*arrow_concatenated_array)); + } else { + return NULL; + } +} + + +G_DEFINE_TYPE(GArrowNullArray, + garrow_null_array, + GARROW_TYPE_ARRAY) + +static void +garrow_null_array_init(GArrowNullArray *object) +{ +} + +static void +garrow_null_array_class_init(GArrowNullArrayClass *klass) +{ +} + +/** + * garrow_null_array_new: + * @length: An array length. + * + * Returns: A newly created #GArrowNullArray. + */ +GArrowNullArray * +garrow_null_array_new(gint64 length) +{ + auto arrow_null_array = std::make_shared<arrow::NullArray>(length); + std::shared_ptr<arrow::Array> arrow_array = arrow_null_array; + auto array = garrow_array_new_raw(&arrow_array); + return GARROW_NULL_ARRAY(array); +} + + +G_DEFINE_TYPE(GArrowPrimitiveArray, + garrow_primitive_array, + GARROW_TYPE_ARRAY) + +G_END_DECLS +template <typename T> +GArrowArray * +garrow_primitive_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + const auto arrow_data = garrow_buffer_get_raw(data); + const auto arrow_bitmap = garrow_buffer_get_raw(null_bitmap); + auto arrow_specific_array = + std::make_shared<typename arrow::TypeTraits<T>::ArrayType>(length, + arrow_data, + arrow_bitmap, + n_nulls); + auto arrow_array = + std::static_pointer_cast<arrow::Array>(arrow_specific_array); + return garrow_array_new_raw(&arrow_array, + "array", &arrow_array, + "null-bitmap", null_bitmap, + "buffer1", data, + NULL); +}; + +template <typename T> +GArrowArray * +garrow_primitive_array_new(GArrowDataType *data_type, + gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + auto arrow_data_type = garrow_data_type_get_raw(data_type); + const auto arrow_data = garrow_buffer_get_raw(data); + const auto arrow_bitmap = garrow_buffer_get_raw(null_bitmap); + auto arrow_specific_array = + std::make_shared<typename arrow::TypeTraits<T>::ArrayType>(arrow_data_type, + length, + arrow_data, + arrow_bitmap, + n_nulls); + auto arrow_array = + std::static_pointer_cast<arrow::Array>(arrow_specific_array); + return garrow_array_new_raw(&arrow_array, + "array", &arrow_array, + "null-bitmap", null_bitmap, + "buffer1", data, + NULL); +}; +G_BEGIN_DECLS + +static void +garrow_primitive_array_init(GArrowPrimitiveArray *object) +{ +} + +static void +garrow_primitive_array_class_init(GArrowPrimitiveArrayClass *klass) +{ +} + +/** + * garrow_primitive_array_get_buffer: + * @array: A #GArrowPrimitiveArray. + * + * Returns: (transfer full): The data of the array as #GArrowBuffer. + * + * Deprecated: 1.0.0: Use garrow_primitive_array_get_data_buffer() instead. + */ +GArrowBuffer * +garrow_primitive_array_get_buffer(GArrowPrimitiveArray *array) +{ + return garrow_primitive_array_get_data_buffer(array); +} + +/** + * garrow_primitive_array_get_data_buffer: + * @array: A #GArrowPrimitiveArray. + * + * Returns: (transfer full): The data of the array as #GArrowBuffer. + * + * Since: 1.0.0 + */ +GArrowBuffer * +garrow_primitive_array_get_data_buffer(GArrowPrimitiveArray *array) +{ + auto priv = GARROW_ARRAY_GET_PRIVATE(array); + if (priv->buffer1) { + g_object_ref(priv->buffer1); + return priv->buffer1; + } + + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_primitive_array = + std::static_pointer_cast<arrow::PrimitiveArray>(arrow_array); + auto arrow_data = arrow_primitive_array->values(); + return garrow_buffer_new_raw(&arrow_data); +} + + +G_DEFINE_TYPE(GArrowBooleanArray, + garrow_boolean_array, + GARROW_TYPE_PRIMITIVE_ARRAY) + +static void +garrow_boolean_array_init(GArrowBooleanArray *object) +{ +} + +static void +garrow_boolean_array_class_init(GArrowBooleanArrayClass *klass) +{ +} + +/** + * garrow_boolean_array_new: + * @length: The number of elements. + * @data: The binary data in Arrow format of the array. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowBooleanArray. + * + * Since: 0.4.0 + */ +GArrowBooleanArray * +garrow_boolean_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + auto array = garrow_primitive_array_new<arrow::BooleanType>(length, + data, + null_bitmap, + n_nulls); + return GARROW_BOOLEAN_ARRAY(array); +} + +/** + * garrow_boolean_array_get_value: + * @array: A #GArrowBooleanArray. + * @i: The index of the target value. + * + * Returns: The @i-th value. + */ +gboolean +garrow_boolean_array_get_value(GArrowBooleanArray *array, + gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return static_cast<arrow::BooleanArray *>(arrow_array.get())->Value(i); +} + +/** + * garrow_boolean_array_get_values: + * @array: A #GArrowBooleanArray. + * @length: (out): The number of values. + * + * Returns: (array length=length) (transfer full): + * The raw boolean values. + * + * It should be freed with g_free() when no longer needed. + */ +gboolean * +garrow_boolean_array_get_values(GArrowBooleanArray *array, + gint64 *length) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_boolean_array = + std::static_pointer_cast<arrow::BooleanArray>(arrow_array); + *length = arrow_boolean_array->length(); + auto values = static_cast<gboolean *>(g_new(gboolean, *length)); + for (gint64 i = 0; i < *length; ++i) { + values[i] = arrow_boolean_array->Value(i); + } + return values; +} + + +G_DEFINE_TYPE(GArrowNumericArray, + garrow_numeric_array, + GARROW_TYPE_PRIMITIVE_ARRAY) + +static void +garrow_numeric_array_init(GArrowNumericArray *object) +{ +} + +static void +garrow_numeric_array_class_init(GArrowNumericArrayClass *klass) +{ +} + + +G_DEFINE_TYPE(GArrowInt8Array, + garrow_int8_array, + GARROW_TYPE_NUMERIC_ARRAY) + +static void +garrow_int8_array_init(GArrowInt8Array *object) +{ +} + +static void +garrow_int8_array_class_init(GArrowInt8ArrayClass *klass) +{ +} + +/** + * garrow_int8_array_new: + * @length: The number of elements. + * @data: The binary data in Arrow format of the array. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowInt8Array. + * + * Since: 0.4.0 + */ +GArrowInt8Array * +garrow_int8_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + auto array = garrow_primitive_array_new<arrow::Int8Type>(length, + data, + null_bitmap, + n_nulls); + return GARROW_INT8_ARRAY(array); +} + +/** + * garrow_int8_array_get_value: + * @array: A #GArrowInt8Array. + * @i: The index of the target value. + * + * Returns: The @i-th value. + */ +gint8 +garrow_int8_array_get_value(GArrowInt8Array *array, + gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return static_cast<arrow::Int8Array *>(arrow_array.get())->Value(i); +} + +/** + * garrow_int8_array_get_values: + * @array: A #GArrowInt8Array. + * @length: (out): The number of values. + * + * Returns: (array length=length): The raw values. + */ +const gint8 * +garrow_int8_array_get_values(GArrowInt8Array *array, + gint64 *length) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return garrow_array_get_values_raw<arrow::Int8Type>(arrow_array, length); +} + +G_DEFINE_TYPE(GArrowUInt8Array, + garrow_uint8_array, + GARROW_TYPE_NUMERIC_ARRAY) + +static void +garrow_uint8_array_init(GArrowUInt8Array *object) +{ +} + +static void +garrow_uint8_array_class_init(GArrowUInt8ArrayClass *klass) +{ +} + +/** + * garrow_uint8_array_new: + * @length: The number of elements. + * @data: The binary data in Arrow format of the array. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowUInt8Array. + * + * Since: 0.4.0 + */ +GArrowUInt8Array * +garrow_uint8_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + auto array = garrow_primitive_array_new<arrow::UInt8Type>(length, + data, + null_bitmap, + n_nulls); + return GARROW_UINT8_ARRAY(array); +} + +/** + * garrow_uint8_array_get_value: + * @array: A #GArrowUInt8Array. + * @i: The index of the target value. + * + * Returns: The @i-th value. + */ +guint8 +garrow_uint8_array_get_value(GArrowUInt8Array *array, + gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return static_cast<arrow::UInt8Array *>(arrow_array.get())->Value(i); +} + +/** + * garrow_uint8_array_get_values: + * @array: A #GArrowUInt8Array. + * @length: (out): The number of values. + * + * Returns: (array length=length): The raw values. + */ +const guint8 * +garrow_uint8_array_get_values(GArrowUInt8Array *array, + gint64 *length) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return garrow_array_get_values_raw<arrow::UInt8Type>(arrow_array, length); +} + + +G_DEFINE_TYPE(GArrowInt16Array, + garrow_int16_array, + GARROW_TYPE_NUMERIC_ARRAY) + +static void +garrow_int16_array_init(GArrowInt16Array *object) +{ +} + +static void +garrow_int16_array_class_init(GArrowInt16ArrayClass *klass) +{ +} + +/** + * garrow_int16_array_new: + * @length: The number of elements. + * @data: The binary data in Arrow format of the array. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowInt16Array. + * + * Since: 0.4.0 + */ +GArrowInt16Array * +garrow_int16_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + auto array = garrow_primitive_array_new<arrow::Int16Type>(length, + data, + null_bitmap, + n_nulls); + return GARROW_INT16_ARRAY(array); +} + +/** + * garrow_int16_array_get_value: + * @array: A #GArrowInt16Array. + * @i: The index of the target value. + * + * Returns: The @i-th value. + */ +gint16 +garrow_int16_array_get_value(GArrowInt16Array *array, + gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return static_cast<arrow::Int16Array *>(arrow_array.get())->Value(i); +} + +/** + * garrow_int16_array_get_values: + * @array: A #GArrowInt16Array. + * @length: (out): The number of values. + * + * Returns: (array length=length): The raw values. + */ +const gint16 * +garrow_int16_array_get_values(GArrowInt16Array *array, + gint64 *length) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return garrow_array_get_values_raw<arrow::Int16Type>(arrow_array, length); +} + + +G_DEFINE_TYPE(GArrowUInt16Array, + garrow_uint16_array, + GARROW_TYPE_NUMERIC_ARRAY) + +static void +garrow_uint16_array_init(GArrowUInt16Array *object) +{ +} + +static void +garrow_uint16_array_class_init(GArrowUInt16ArrayClass *klass) +{ +} + +/** + * garrow_uint16_array_new: + * @length: The number of elements. + * @data: The binary data in Arrow format of the array. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowUInt16Array. + * + * Since: 0.4.0 + */ +GArrowUInt16Array * +garrow_uint16_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + auto array = garrow_primitive_array_new<arrow::UInt16Type>(length, + data, + null_bitmap, + n_nulls); + return GARROW_UINT16_ARRAY(array); +} + +/** + * garrow_uint16_array_get_value: + * @array: A #GArrowUInt16Array. + * @i: The index of the target value. + * + * Returns: The @i-th value. + */ +guint16 +garrow_uint16_array_get_value(GArrowUInt16Array *array, + gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return static_cast<arrow::UInt16Array *>(arrow_array.get())->Value(i); +} + +/** + * garrow_uint16_array_get_values: + * @array: A #GArrowUInt16Array. + * @length: (out): The number of values. + * + * Returns: (array length=length): The raw values. + */ +const guint16 * +garrow_uint16_array_get_values(GArrowUInt16Array *array, + gint64 *length) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return garrow_array_get_values_raw<arrow::UInt16Type>(arrow_array, length); +} + + +G_DEFINE_TYPE(GArrowInt32Array, + garrow_int32_array, + GARROW_TYPE_NUMERIC_ARRAY) + +static void +garrow_int32_array_init(GArrowInt32Array *object) +{ +} + +static void +garrow_int32_array_class_init(GArrowInt32ArrayClass *klass) +{ +} + +/** + * garrow_int32_array_new: + * @length: The number of elements. + * @data: The binary data in Arrow format of the array. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowInt32Array. + * + * Since: 0.4.0 + */ +GArrowInt32Array * +garrow_int32_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + auto array = garrow_primitive_array_new<arrow::Int32Type>(length, + data, + null_bitmap, + n_nulls); + return GARROW_INT32_ARRAY(array); +} + +/** + * garrow_int32_array_get_value: + * @array: A #GArrowInt32Array. + * @i: The index of the target value. + * + * Returns: The @i-th value. + */ +gint32 +garrow_int32_array_get_value(GArrowInt32Array *array, + gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return static_cast<arrow::Int32Array *>(arrow_array.get())->Value(i); +} + +/** + * garrow_int32_array_get_values: + * @array: A #GArrowInt32Array. + * @length: (out): The number of values. + * + * Returns: (array length=length): The raw values. + */ +const gint32 * +garrow_int32_array_get_values(GArrowInt32Array *array, + gint64 *length) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return garrow_array_get_values_raw<arrow::Int32Type>(arrow_array, length); +} + + +G_DEFINE_TYPE(GArrowUInt32Array, + garrow_uint32_array, + GARROW_TYPE_NUMERIC_ARRAY) + +static void +garrow_uint32_array_init(GArrowUInt32Array *object) +{ +} + +static void +garrow_uint32_array_class_init(GArrowUInt32ArrayClass *klass) +{ +} + +/** + * garrow_uint32_array_new: + * @length: The number of elements. + * @data: The binary data in Arrow format of the array. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowUInt32Array. + * + * Since: 0.4.0 + */ +GArrowUInt32Array * +garrow_uint32_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + auto array = garrow_primitive_array_new<arrow::UInt32Type>(length, + data, + null_bitmap, + n_nulls); + return GARROW_UINT32_ARRAY(array); +} + +/** + * garrow_uint32_array_get_value: + * @array: A #GArrowUInt32Array. + * @i: The index of the target value. + * + * Returns: The @i-th value. + */ +guint32 +garrow_uint32_array_get_value(GArrowUInt32Array *array, + gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return static_cast<arrow::UInt32Array *>(arrow_array.get())->Value(i); +} + +/** + * garrow_uint32_array_get_values: + * @array: A #GArrowUInt32Array. + * @length: (out): The number of values. + * + * Returns: (array length=length): The raw values. + */ +const guint32 * +garrow_uint32_array_get_values(GArrowUInt32Array *array, + gint64 *length) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return garrow_array_get_values_raw<arrow::UInt32Type>(arrow_array, length); +} + + +G_DEFINE_TYPE(GArrowInt64Array, + garrow_int64_array, + GARROW_TYPE_NUMERIC_ARRAY) + +static void +garrow_int64_array_init(GArrowInt64Array *object) +{ +} + +static void +garrow_int64_array_class_init(GArrowInt64ArrayClass *klass) +{ +} + +/** + * garrow_int64_array_new: + * @length: The number of elements. + * @data: The binary data in Arrow format of the array. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowInt64Array. + * + * Since: 0.4.0 + */ +GArrowInt64Array * +garrow_int64_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + auto array = garrow_primitive_array_new<arrow::Int64Type>(length, + data, + null_bitmap, + n_nulls); + return GARROW_INT64_ARRAY(array); +} + +/** + * garrow_int64_array_get_value: + * @array: A #GArrowInt64Array. + * @i: The index of the target value. + * + * Returns: The @i-th value. + */ +gint64 +garrow_int64_array_get_value(GArrowInt64Array *array, + gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return static_cast<arrow::Int64Array *>(arrow_array.get())->Value(i); +} + +/** + * garrow_int64_array_get_values: + * @array: A #GArrowInt64Array. + * @length: (out): The number of values. + * + * Returns: (array length=length): The raw values. + */ +const gint64 * +garrow_int64_array_get_values(GArrowInt64Array *array, + gint64 *length) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto values = + garrow_array_get_values_raw<arrow::Int64Type>(arrow_array, length); + return reinterpret_cast<const gint64 *>(values); +} + + +G_DEFINE_TYPE(GArrowUInt64Array, + garrow_uint64_array, + GARROW_TYPE_NUMERIC_ARRAY) + +static void +garrow_uint64_array_init(GArrowUInt64Array *object) +{ +} + +static void +garrow_uint64_array_class_init(GArrowUInt64ArrayClass *klass) +{ +} + +/** + * garrow_uint64_array_new: + * @length: The number of elements. + * @data: The binary data in Arrow format of the array. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowUInt64Array. + * + * Since: 0.4.0 + */ +GArrowUInt64Array * +garrow_uint64_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + auto array = garrow_primitive_array_new<arrow::UInt64Type>(length, + data, + null_bitmap, + n_nulls); + return GARROW_UINT64_ARRAY(array); +} + +/** + * garrow_uint64_array_get_value: + * @array: A #GArrowUInt64Array. + * @i: The index of the target value. + * + * Returns: The @i-th value. + */ +guint64 +garrow_uint64_array_get_value(GArrowUInt64Array *array, + gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return static_cast<arrow::UInt64Array *>(arrow_array.get())->Value(i); +} + +/** + * garrow_uint64_array_get_values: + * @array: A #GArrowUInt64Array. + * @length: (out): The number of values. + * + * Returns: (array length=length): The raw values. + */ +const guint64 * +garrow_uint64_array_get_values(GArrowUInt64Array *array, + gint64 *length) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto values = + garrow_array_get_values_raw<arrow::UInt64Type>(arrow_array, length); + return reinterpret_cast<const guint64 *>(values); +} + + +G_DEFINE_TYPE(GArrowFloatArray, + garrow_float_array, + GARROW_TYPE_NUMERIC_ARRAY) + +static void +garrow_float_array_init(GArrowFloatArray *object) +{ +} + +static void +garrow_float_array_class_init(GArrowFloatArrayClass *klass) +{ +} + +/** + * garrow_float_array_new: + * @length: The number of elements. + * @data: The binary data in Arrow format of the array. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowFloatArray. + * + * Since: 0.4.0 + */ +GArrowFloatArray * +garrow_float_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + auto array = garrow_primitive_array_new<arrow::FloatType>(length, + data, + null_bitmap, + n_nulls); + return GARROW_FLOAT_ARRAY(array); +} + +/** + * garrow_float_array_get_value: + * @array: A #GArrowFloatArray. + * @i: The index of the target value. + * + * Returns: The @i-th value. + */ +gfloat +garrow_float_array_get_value(GArrowFloatArray *array, + gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return static_cast<arrow::FloatArray *>(arrow_array.get())->Value(i); +} + +/** + * garrow_float_array_get_values: + * @array: A #GArrowFloatArray. + * @length: (out): The number of values. + * + * Returns: (array length=length): The raw values. + */ +const gfloat * +garrow_float_array_get_values(GArrowFloatArray *array, + gint64 *length) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return garrow_array_get_values_raw<arrow::FloatType>(arrow_array, length); +} + + +G_DEFINE_TYPE(GArrowDoubleArray, + garrow_double_array, + GARROW_TYPE_NUMERIC_ARRAY) + +static void +garrow_double_array_init(GArrowDoubleArray *object) +{ +} + +static void +garrow_double_array_class_init(GArrowDoubleArrayClass *klass) +{ +} + +/** + * garrow_double_array_new: + * @length: The number of elements. + * @data: The binary data in Arrow format of the array. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowDoubleArray. + * + * Since: 0.4.0 + */ +GArrowDoubleArray * +garrow_double_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + auto array = garrow_primitive_array_new<arrow::DoubleType>(length, + data, + null_bitmap, + n_nulls); + return GARROW_DOUBLE_ARRAY(array); +} + +/** + * garrow_double_array_get_value: + * @array: A #GArrowDoubleArray. + * @i: The index of the target value. + * + * Returns: The @i-th value. + */ +gdouble +garrow_double_array_get_value(GArrowDoubleArray *array, + gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return static_cast<arrow::DoubleArray *>(arrow_array.get())->Value(i); +} + +/** + * garrow_double_array_get_values: + * @array: A #GArrowDoubleArray. + * @length: (out): The number of values. + * + * Returns: (array length=length): The raw values. + */ +const gdouble * +garrow_double_array_get_values(GArrowDoubleArray *array, + gint64 *length) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return garrow_array_get_values_raw<arrow::DoubleType>(arrow_array, length); +} + + +G_END_DECLS +template <typename BINARY_ARRAY_CLASS> +GArrowArray * +garrow_base_binary_array_new(gint64 length, + GArrowBuffer *value_offsets, + GArrowBuffer *value_data, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + const auto arrow_value_offsets = garrow_buffer_get_raw(value_offsets); + const auto arrow_value_data = garrow_buffer_get_raw(value_data); + const auto arrow_null_bitmap = garrow_buffer_get_raw(null_bitmap); + auto arrow_binary_array = + std::make_shared<BINARY_ARRAY_CLASS>(length, + arrow_value_offsets, + arrow_value_data, + arrow_null_bitmap, + n_nulls); + auto arrow_array = + std::static_pointer_cast<arrow::Array>(arrow_binary_array); + return garrow_array_new_raw(&arrow_array, + "array", &arrow_array, + "null-bitmap", null_bitmap, + "buffer1", value_offsets, + "buffer2", value_data, + NULL); +}; + +template <typename BINARY_ARRAY_CLASS> +GBytes * +garrow_base_binary_array_get_value(GArrowArray *array, + gint64 i) +{ + auto arrow_array = garrow_array_get_raw(array); + auto arrow_binary_array = + std::static_pointer_cast<BINARY_ARRAY_CLASS>(arrow_array); + auto view = arrow_binary_array->GetView(i); + return g_bytes_new_static(view.data(), view.length()); +}; + +template <typename BINARY_ARRAY_CLASS> +GArrowBuffer * +garrow_base_binary_array_get_data_buffer(GArrowArray *array) +{ + auto priv = GARROW_ARRAY_GET_PRIVATE(array); + if (priv->buffer2) { + g_object_ref(priv->buffer2); + return priv->buffer2; + } + + auto arrow_array = garrow_array_get_raw(array); + auto arrow_binary_array = + std::static_pointer_cast<BINARY_ARRAY_CLASS>(arrow_array); + auto arrow_data = arrow_binary_array->value_data(); + return garrow_buffer_new_raw(&arrow_data); +}; + +template <typename BINARY_ARRAY_CLASS> +GArrowBuffer * +garrow_base_binary_array_get_offsets_buffer(GArrowArray *array) +{ + auto priv = GARROW_ARRAY_GET_PRIVATE(array); + if (priv->buffer1) { + g_object_ref(priv->buffer1); + return priv->buffer1; + } + + auto arrow_array = garrow_array_get_raw(array); + auto arrow_binary_array = + std::static_pointer_cast<BINARY_ARRAY_CLASS>(arrow_array); + auto arrow_offsets = arrow_binary_array->value_offsets(); + return garrow_buffer_new_raw(&arrow_offsets); +}; +G_BEGIN_DECLS + +G_DEFINE_TYPE(GArrowBinaryArray, + garrow_binary_array, + GARROW_TYPE_ARRAY) + +static void +garrow_binary_array_init(GArrowBinaryArray *object) +{ +} + +static void +garrow_binary_array_class_init(GArrowBinaryArrayClass *klass) +{ +} + +/** + * garrow_binary_array_new: + * @length: The number of elements. + * @value_offsets: The value offsets of @data in Arrow format. + * @value_data: The binary data in Arrow format of the array. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowBinaryArray. + * + * Since: 0.4.0 + */ +GArrowBinaryArray * +garrow_binary_array_new(gint64 length, + GArrowBuffer *value_offsets, + GArrowBuffer *value_data, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + auto binary_array = + garrow_base_binary_array_new<arrow::BinaryArray>(length, + value_offsets, + value_data, + null_bitmap, + n_nulls); + return GARROW_BINARY_ARRAY(binary_array); +} + +/** + * garrow_binary_array_get_value: + * @array: A #GArrowBinaryArray. + * @i: The index of the target value. + * + * Returns: (transfer full): The @i-th value. + */ +GBytes * +garrow_binary_array_get_value(GArrowBinaryArray *array, + gint64 i) +{ + return garrow_base_binary_array_get_value<arrow::BinaryArray>( + GARROW_ARRAY(array), i); +} + +/** + * garrow_binary_array_get_buffer: + * @array: A #GArrowBinaryArray. + * + * Returns: (transfer full): The data of the array as #GArrowBuffer. + * + * Deprecated: 1.0.0: Use garrow_binary_array_get_data_buffer() instead. + */ +GArrowBuffer * +garrow_binary_array_get_buffer(GArrowBinaryArray *array) +{ + return garrow_binary_array_get_data_buffer(array); +} + +/** + * garrow_binary_array_get_data_buffer: + * @array: A #GArrowBinaryArray. + * + * Returns: (transfer full): The data of the array as #GArrowBuffer. + * + * Since: 1.0.0 + */ +GArrowBuffer * +garrow_binary_array_get_data_buffer(GArrowBinaryArray *array) +{ + return garrow_base_binary_array_get_data_buffer<arrow::BinaryArray>( + GARROW_ARRAY(array)); +} + +/** + * garrow_binary_array_get_offsets_buffer: + * @array: A #GArrowBinaryArray. + * + * Returns: (transfer full): The offsets of the array as #GArrowBuffer. + */ +GArrowBuffer * +garrow_binary_array_get_offsets_buffer(GArrowBinaryArray *array) +{ + return garrow_base_binary_array_get_offsets_buffer<arrow::BinaryArray>( + GARROW_ARRAY(array)); +} + + +G_DEFINE_TYPE(GArrowLargeBinaryArray, + garrow_large_binary_array, + GARROW_TYPE_ARRAY) + +static void +garrow_large_binary_array_init(GArrowLargeBinaryArray *object) +{ +} + +static void +garrow_large_binary_array_class_init(GArrowLargeBinaryArrayClass *klass) +{ +} + +/** + * garrow_large_binary_array_new: + * @length: The number of elements. + * @value_offsets: The value offsets of @data in Arrow format. + * @value_data: The binary data in Arrow format of the array. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowLargeBinaryArray. + * + * Since: 0.16.0 + */ +GArrowLargeBinaryArray * +garrow_large_binary_array_new(gint64 length, + GArrowBuffer *value_offsets, + GArrowBuffer *value_data, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + auto large_binary_array = + garrow_base_binary_array_new<arrow::LargeBinaryArray>(length, + value_offsets, + value_data, + null_bitmap, + n_nulls); + return GARROW_LARGE_BINARY_ARRAY(large_binary_array); +} + +/** + * garrow_large_binary_array_get_value: + * @array: A #GArrowLargeBinaryArray. + * @i: The index of the target value. + * + * Returns: (transfer full): The @i-th value. + * + * Since: 0.16.0 + */ +GBytes * +garrow_large_binary_array_get_value(GArrowLargeBinaryArray *array, + gint64 i) +{ + return garrow_base_binary_array_get_value<arrow::LargeBinaryArray>( + GARROW_ARRAY(array), i); +} + +/** + * garrow_large_binary_array_get_buffer: + * @array: A #GArrowLargeBinaryArray. + * + * Returns: (transfer full): The data of the array as #GArrowBuffer. + * + * Since: 0.16.0 + * + * Deprecated: 1.0.0: Use garrow_large_binary_array_get_data_buffer() instead. + */ +GArrowBuffer * +garrow_large_binary_array_get_buffer(GArrowLargeBinaryArray *array) +{ + return garrow_large_binary_array_get_data_buffer(array); +} + +/** + * garrow_large_binary_array_get_data_buffer: + * @array: A #GArrowLargeBinaryArray. + * + * Returns: (transfer full): The data of the array as #GArrowBuffer. + * + * Since: 1.0.0 + */ +GArrowBuffer * +garrow_large_binary_array_get_data_buffer(GArrowLargeBinaryArray *array) +{ + return garrow_base_binary_array_get_data_buffer<arrow::LargeBinaryArray>( + GARROW_ARRAY(array)); +} + +/** + * garrow_large_binary_array_get_offsets_buffer: + * @array: A #GArrowLargeBinaryArray. + * + * Returns: (transfer full): The offsets of the array as #GArrowBuffer. + * + * Since: 0.16.0 + */ +GArrowBuffer * +garrow_large_binary_array_get_offsets_buffer(GArrowLargeBinaryArray *array) +{ + return garrow_base_binary_array_get_offsets_buffer<arrow::LargeBinaryArray>( + GARROW_ARRAY(array)); +} + + +G_END_DECLS +template <typename STRING_ARRAY_CLASS> +gchar * +garrow_base_string_array_get_value(GArrowArray *array, + gint64 i) +{ + auto arrow_array = garrow_array_get_raw(array); + auto arrow_string_array = + std::static_pointer_cast<STRING_ARRAY_CLASS>(arrow_array); + auto view = arrow_string_array->GetView(i); + return g_strndup(view.data(), view.length()); +}; +G_BEGIN_DECLS + +G_DEFINE_TYPE(GArrowStringArray, + garrow_string_array, + GARROW_TYPE_BINARY_ARRAY) + +static void +garrow_string_array_init(GArrowStringArray *object) +{ +} + +static void +garrow_string_array_class_init(GArrowStringArrayClass *klass) +{ +} + +/** + * garrow_string_array_new: + * @length: The number of elements. + * @value_offsets: The value offsets of @data in Arrow format. + * @value_data: The binary data in Arrow format of the array. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowStringArray. + * + * Since: 0.4.0 + */ +GArrowStringArray * +garrow_string_array_new(gint64 length, + GArrowBuffer *value_offsets, + GArrowBuffer *value_data, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + auto string_array = + garrow_base_binary_array_new<arrow::StringArray>(length, + value_offsets, + value_data, + null_bitmap, + n_nulls); + return GARROW_STRING_ARRAY(string_array); +} + +/** + * garrow_string_array_get_string: + * @array: A #GArrowStringArray. + * @i: The index of the target value. + * + * Returns: The @i-th UTF-8 encoded string. + */ +gchar * +garrow_string_array_get_string(GArrowStringArray *array, + gint64 i) +{ + return garrow_base_string_array_get_value<arrow::StringArray>( + GARROW_ARRAY(array), i); +} + + +G_DEFINE_TYPE(GArrowLargeStringArray, + garrow_large_string_array, + GARROW_TYPE_LARGE_BINARY_ARRAY) + +static void +garrow_large_string_array_init(GArrowLargeStringArray *object) +{ +} + +static void +garrow_large_string_array_class_init(GArrowLargeStringArrayClass *klass) +{ +} + +/** + * garrow_large_string_array_new: + * @length: The number of elements. + * @value_offsets: The value offsets of @data in Arrow format. + * @value_data: The binary data in Arrow format of the array. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowLargeStringArray. + * + * Since: 0.16.0 + */ +GArrowLargeStringArray * +garrow_large_string_array_new(gint64 length, + GArrowBuffer *value_offsets, + GArrowBuffer *value_data, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + auto large_string_array = + garrow_base_binary_array_new<arrow::LargeStringArray>(length, + value_offsets, + value_data, + null_bitmap, + n_nulls); + return GARROW_LARGE_STRING_ARRAY(large_string_array); +} + +/** + * garrow_large_string_array_get_string: + * @array: A #GArrowLargeStringArray. + * @i: The index of the target value. + * + * Returns: The @i-th UTF-8 encoded string. + * + * Since: 0.16.0 + */ +gchar * +garrow_large_string_array_get_string(GArrowLargeStringArray *array, + gint64 i) +{ + return garrow_base_string_array_get_value<arrow::LargeStringArray>( + GARROW_ARRAY(array), i); +} + + +G_DEFINE_TYPE(GArrowDate32Array, + garrow_date32_array, + GARROW_TYPE_NUMERIC_ARRAY) + +static void +garrow_date32_array_init(GArrowDate32Array *object) +{ +} + +static void +garrow_date32_array_class_init(GArrowDate32ArrayClass *klass) +{ +} + +/** + * garrow_date32_array_new: + * @length: The number of elements. + * @data: The binary data in Arrow format of the array. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowDate32Array. + * + * Since: 0.7.0 + */ +GArrowDate32Array * +garrow_date32_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + auto array = garrow_primitive_array_new<arrow::Date32Type>(length, + data, + null_bitmap, + n_nulls); + return GARROW_DATE32_ARRAY(array); +} + +/** + * garrow_date32_array_get_value: + * @array: A #GArrowDate32Array. + * @i: The index of the target value. + * + * Returns: The @i-th value. + * + * Since: 0.7.0 + */ +gint32 +garrow_date32_array_get_value(GArrowDate32Array *array, + gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return static_cast<arrow::Date32Array *>(arrow_array.get())->Value(i); +} + +/** + * garrow_date32_array_get_values: + * @array: A #GArrowDate32Array. + * @length: (out): The number of values. + * + * Returns: (array length=length): The raw values. + * + * Since: 0.7.0 + */ +const gint32 * +garrow_date32_array_get_values(GArrowDate32Array *array, + gint64 *length) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return garrow_array_get_values_raw<arrow::Date32Type>(arrow_array, length); +} + + +G_DEFINE_TYPE(GArrowDate64Array, + garrow_date64_array, + GARROW_TYPE_NUMERIC_ARRAY) + +static void +garrow_date64_array_init(GArrowDate64Array *object) +{ +} + +static void +garrow_date64_array_class_init(GArrowDate64ArrayClass *klass) +{ +} + +/** + * garrow_date64_array_new: + * @length: The number of elements. + * @data: The binary data in Arrow format of the array. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowDate64Array. + * + * Since: 0.7.0 + */ +GArrowDate64Array * +garrow_date64_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + auto array = garrow_primitive_array_new<arrow::Date64Type>(length, + data, + null_bitmap, + n_nulls); + return GARROW_DATE64_ARRAY(array); +} + +/** + * garrow_date64_array_get_value: + * @array: A #GArrowDate64Array. + * @i: The index of the target value. + * + * Returns: The @i-th value. + * + * Since: 0.7.0 + */ +gint64 +garrow_date64_array_get_value(GArrowDate64Array *array, + gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return static_cast<arrow::Date64Array *>(arrow_array.get())->Value(i); +} + +/** + * garrow_date64_array_get_values: + * @array: A #GArrowDate64Array. + * @length: (out): The number of values. + * + * Returns: (array length=length): The raw values. + * + * Since: 0.7.0 + */ +const gint64 * +garrow_date64_array_get_values(GArrowDate64Array *array, + gint64 *length) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto values = + garrow_array_get_values_raw<arrow::Date64Type>(arrow_array, length); + return reinterpret_cast<const gint64 *>(values); +} + + +G_DEFINE_TYPE(GArrowTimestampArray, + garrow_timestamp_array, + GARROW_TYPE_NUMERIC_ARRAY) + +static void +garrow_timestamp_array_init(GArrowTimestampArray *object) +{ +} + +static void +garrow_timestamp_array_class_init(GArrowTimestampArrayClass *klass) +{ +} + +/** + * garrow_timestamp_array_new: + * @data_type: The #GArrowTimestampDataType. + * @length: The number of elements. + * @data: The binary data in Arrow format of the array. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowTimestampArray. + * + * Since: 0.7.0 + */ +GArrowTimestampArray * +garrow_timestamp_array_new(GArrowTimestampDataType *data_type, + gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + auto array = + garrow_primitive_array_new<arrow::TimestampType>(GARROW_DATA_TYPE(data_type), + length, + data, + null_bitmap, + n_nulls); + return GARROW_TIMESTAMP_ARRAY(array); +} + +/** + * garrow_timestamp_array_get_value: + * @array: A #GArrowTimestampArray. + * @i: The index of the target value. + * + * Returns: The @i-th value. + * + * Since: 0.7.0 + */ +gint64 +garrow_timestamp_array_get_value(GArrowTimestampArray *array, + gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return static_cast<arrow::TimestampArray *>(arrow_array.get())->Value(i); +} + +/** + * garrow_timestamp_array_get_values: + * @array: A #GArrowTimestampArray. + * @length: (out): The number of values. + * + * Returns: (array length=length): The raw values. + * + * Since: 0.7.0 + */ +const gint64 * +garrow_timestamp_array_get_values(GArrowTimestampArray *array, + gint64 *length) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto values = + garrow_array_get_values_raw<arrow::TimestampType>(arrow_array, length); + return reinterpret_cast<const gint64 *>(values); +} + + +G_DEFINE_TYPE(GArrowTime32Array, + garrow_time32_array, + GARROW_TYPE_NUMERIC_ARRAY) + +static void +garrow_time32_array_init(GArrowTime32Array *object) +{ +} + +static void +garrow_time32_array_class_init(GArrowTime32ArrayClass *klass) +{ +} + +/** + * garrow_time32_array_new: + * @data_type: The #GArrowTime32DataType. + * @length: The number of elements. + * @data: The binary data in Arrow format of the array. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowTime32Array. + * + * Since: 0.7.0 + */ +GArrowTime32Array * +garrow_time32_array_new(GArrowTime32DataType *data_type, + gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + auto array = + garrow_primitive_array_new<arrow::Time32Type>(GARROW_DATA_TYPE(data_type), + length, + data, + null_bitmap, + n_nulls); + return GARROW_TIME32_ARRAY(array); +} + +/** + * garrow_time32_array_get_value: + * @array: A #GArrowTime32Array. + * @i: The index of the target value. + * + * Returns: The @i-th value. + * + * Since: 0.7.0 + */ +gint32 +garrow_time32_array_get_value(GArrowTime32Array *array, + gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return static_cast<arrow::Time32Array *>(arrow_array.get())->Value(i); +} + +/** + * garrow_time32_array_get_values: + * @array: A #GArrowTime32Array. + * @length: (out): The number of values. + * + * Returns: (array length=length): The raw values. + * + * Since: 0.7.0 + */ +const gint32 * +garrow_time32_array_get_values(GArrowTime32Array *array, + gint64 *length) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return garrow_array_get_values_raw<arrow::Time32Type>(arrow_array, length); +} + + +G_DEFINE_TYPE(GArrowTime64Array, + garrow_time64_array, + GARROW_TYPE_NUMERIC_ARRAY) + +static void +garrow_time64_array_init(GArrowTime64Array *object) +{ +} + +static void +garrow_time64_array_class_init(GArrowTime64ArrayClass *klass) +{ +} + +/** + * garrow_time64_array_new: + * @data_type: The #GArrowTime64DataType. + * @length: The number of elements. + * @data: The binary data in Arrow format of the array. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowTime64Array. + * + * Since: 0.7.0 + */ +GArrowTime64Array * +garrow_time64_array_new(GArrowTime64DataType *data_type, + gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + auto array = + garrow_primitive_array_new<arrow::Time64Type>(GARROW_DATA_TYPE(data_type), + length, + data, + null_bitmap, + n_nulls); + return GARROW_TIME64_ARRAY(array); +} + +/** + * garrow_time64_array_get_value: + * @array: A #GArrowTime64Array. + * @i: The index of the target value. + * + * Returns: The @i-th value. + * + * Since: 0.7.0 + */ +gint64 +garrow_time64_array_get_value(GArrowTime64Array *array, + gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + return static_cast<arrow::Time64Array *>(arrow_array.get())->Value(i); +} + +/** + * garrow_time64_array_get_values: + * @array: A #GArrowTime64Array. + * @length: (out): The number of values. + * + * Returns: (array length=length): The raw values. + * + * Since: 0.7.0 + */ +const gint64 * +garrow_time64_array_get_values(GArrowTime64Array *array, + gint64 *length) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto values = + garrow_array_get_values_raw<arrow::Time64Type>(arrow_array, length); + return reinterpret_cast<const gint64 *>(values); +} + + +G_DEFINE_TYPE(GArrowFixedSizeBinaryArray, + garrow_fixed_size_binary_array, + GARROW_TYPE_PRIMITIVE_ARRAY) +static void +garrow_fixed_size_binary_array_init(GArrowFixedSizeBinaryArray *object) +{ +} + +static void +garrow_fixed_size_binary_array_class_init(GArrowFixedSizeBinaryArrayClass *klass) +{ +} + +/** + * garrow_fixed_size_binary_array_new: + * @data_type: A #GArrowFixedSizeBinaryDataType for the array. + * @length: The number of elements. + * @data: The binary data in Arrow format of the array. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowFixedSizeBinaryArray. + * + * Since: 3.0.0 + */ +GArrowFixedSizeBinaryArray * +garrow_fixed_size_binary_array_new(GArrowFixedSizeBinaryDataType *data_type, + gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + auto array = + garrow_primitive_array_new<arrow::FixedSizeBinaryType>( + GARROW_DATA_TYPE(data_type), + length, + data, + null_bitmap, + n_nulls); + return GARROW_FIXED_SIZE_BINARY_ARRAY(array); +} + +/** + * garrow_fixed_size_binary_array_get_byte_width: + * @array: A #GArrowFixedSizeBinaryArray. + * + * Returns: The number of bytes of each value. + * + * Since: 3.0.0 + */ +gint32 +garrow_fixed_size_binary_array_get_byte_width(GArrowFixedSizeBinaryArray *array) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_binary_array = + std::static_pointer_cast<arrow::FixedSizeBinaryArray>(arrow_array); + return arrow_binary_array->byte_width(); +} + +/** + * garrow_fixed_size_binary_array_get_value: + * @array: A #GArrowFixedSizeBinaryArray. + * @i: The index of the target value. + * + * Returns: (transfer full): The @i-th value. + * + * Since: 3.0.0 + */ +GBytes * +garrow_fixed_size_binary_array_get_value(GArrowFixedSizeBinaryArray *array, + gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_binary_array = + std::static_pointer_cast<arrow::FixedSizeBinaryArray>(arrow_array); + auto value = arrow_binary_array->GetValue(i); + return g_bytes_new_static(value, + arrow_binary_array->byte_width()); +} + +/** + * garrow_fixed_size_binary_array_get_values_bytes: + * @array: A #GArrowFixedSizeBinaryArray. + * + * Returns: (transfer full): All values as a #GBytes. + * + * Since: 3.0.0 + */ +GBytes * +garrow_fixed_size_binary_array_get_values_bytes(GArrowFixedSizeBinaryArray *array) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_binary_array = + std::static_pointer_cast<arrow::FixedSizeBinaryArray>(arrow_array); + auto value = arrow_binary_array->raw_values(); + return g_bytes_new_static(value, + arrow_binary_array->byte_width() * + arrow_array->length()); +} + + +G_DEFINE_TYPE(GArrowDecimal128Array, + garrow_decimal128_array, + GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY) +static void +garrow_decimal128_array_init(GArrowDecimal128Array *object) +{ +} + +static void +garrow_decimal128_array_class_init(GArrowDecimal128ArrayClass *klass) +{ +} + +/** + * garrow_decimal128_array_format_value: + * @array: A #GArrowDecimal128Array. + * @i: The index of the target value. + * + * Returns: (transfer full): The formatted @i-th value. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 0.10.0 + */ +gchar * +garrow_decimal128_array_format_value(GArrowDecimal128Array *array, + gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_decimal128_array = + std::static_pointer_cast<arrow::Decimal128Array>(arrow_array); + auto value = arrow_decimal128_array->FormatValue(i); + return g_strndup(value.data(), value.size()); +} + +/** + * garrow_decimal128_array_get_value: + * @array: A #GArrowDecimal128Array. + * @i: The index of the target value. + * + * Returns: (transfer full): The @i-th value. + * + * Since: 0.10.0 + */ +GArrowDecimal128 * +garrow_decimal128_array_get_value(GArrowDecimal128Array *array, + gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_decimal128_array = + std::static_pointer_cast<arrow::Decimal128Array>(arrow_array); + auto arrow_decimal128 = + std::make_shared<arrow::Decimal128>(arrow_decimal128_array->GetValue(i)); + return garrow_decimal128_new_raw(&arrow_decimal128); +} + + +G_DEFINE_TYPE(GArrowDecimal256Array, + garrow_decimal256_array, + GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY) +static void +garrow_decimal256_array_init(GArrowDecimal256Array *object) +{ +} + +static void +garrow_decimal256_array_class_init(GArrowDecimal256ArrayClass *klass) +{ +} + +/** + * garrow_decimal256_array_format_value: + * @array: A #GArrowDecimal256Array. + * @i: The index of the target value. + * + * Returns: (transfer full): The formatted @i-th value. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 3.0.0 + */ +gchar * +garrow_decimal256_array_format_value(GArrowDecimal256Array *array, + gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_decimal256_array = + std::static_pointer_cast<arrow::Decimal256Array>(arrow_array); + auto value = arrow_decimal256_array->FormatValue(i); + return g_strndup(value.data(), value.size()); +} + +/** + * garrow_decimal256_array_get_value: + * @array: A #GArrowDecimal256Array. + * @i: The index of the target value. + * + * Returns: (transfer full): The @i-th value. + * + * Since: 3.0.0 + */ +GArrowDecimal256 * +garrow_decimal256_array_get_value(GArrowDecimal256Array *array, + gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_decimal256_array = + std::static_pointer_cast<arrow::Decimal256Array>(arrow_array); + auto arrow_decimal256 = + std::make_shared<arrow::Decimal256>(arrow_decimal256_array->GetValue(i)); + return garrow_decimal256_new_raw(&arrow_decimal256); +} + + +typedef struct GArrowExtensionArrayPrivate_ { + GArrowArray *storage; +} GArrowExtensionArrayPrivate; + +enum { + PROP_STORAGE = 1 +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowExtensionArray, + garrow_extension_array, + GARROW_TYPE_ARRAY) + +#define GARROW_EXTENSION_ARRAY_GET_PRIVATE(obj) \ + static_cast<GArrowExtensionArrayPrivate *>( \ + garrow_extension_array_get_instance_private( \ + GARROW_EXTENSION_ARRAY(obj))) + +static void +garrow_extension_array_dispose(GObject *object) +{ + auto priv = GARROW_EXTENSION_ARRAY_GET_PRIVATE(object); + + if (priv->storage) { + g_object_unref(priv->storage); + priv->storage = NULL; + } + + G_OBJECT_CLASS(garrow_extension_array_parent_class)->dispose(object); +} + +static void +garrow_extension_array_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_EXTENSION_ARRAY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_STORAGE: + priv->storage = GARROW_ARRAY(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_extension_array_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_EXTENSION_ARRAY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_STORAGE: + g_value_set_object(value, priv->storage); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_extension_array_init(GArrowExtensionArray *object) +{ +} + +static void +garrow_extension_array_class_init(GArrowExtensionArrayClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_extension_array_dispose; + gobject_class->set_property = garrow_extension_array_set_property; + gobject_class->get_property = garrow_extension_array_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("storage", + "storage", + "The storage array", + GARROW_TYPE_ARRAY, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_STORAGE, spec); +} + +/** + * garrow_extension_array_get_storage: + * @array: A #GArrowExtensionArray. + * + * Returns: (transfer full): The underlying storage of the array. + * + * Since: 3.0.0 + */ +GArrowArray * +garrow_extension_array_get_storage(GArrowExtensionArray *array) +{ + auto priv = GARROW_EXTENSION_ARRAY_GET_PRIVATE(array); + if (priv->storage) { + g_object_ref(priv->storage); + return priv->storage; + } + + auto array_priv = GARROW_ARRAY_GET_PRIVATE(array); + return garrow_array_new_raw(&(array_priv->array)); +} + + +G_END_DECLS + +arrow::EqualOptions * +garrow_equal_options_get_raw(GArrowEqualOptions *equal_options) +{ + auto priv = GARROW_EQUAL_OPTIONS_GET_PRIVATE(equal_options); + return &(priv->options); +} + +GArrowArray * +garrow_array_new_raw(std::shared_ptr<arrow::Array> *arrow_array) +{ + return garrow_array_new_raw(arrow_array, + "array", arrow_array, + NULL); +} + +GArrowArray * +garrow_array_new_raw(std::shared_ptr<arrow::Array> *arrow_array, + const gchar *first_property_name, + ...) +{ + va_list args; + va_start(args, first_property_name); + auto array = garrow_array_new_raw_valist(arrow_array, + first_property_name, + args); + va_end(args); + return array; +} + +GArrowArray * +garrow_array_new_raw_valist(std::shared_ptr<arrow::Array> *arrow_array, + const gchar *first_property_name, + va_list args) +{ + GType type; + + switch ((*arrow_array)->type_id()) { + case arrow::Type::type::NA: + type = GARROW_TYPE_NULL_ARRAY; + break; + case arrow::Type::type::BOOL: + type = GARROW_TYPE_BOOLEAN_ARRAY; + break; + case arrow::Type::type::UINT8: + type = GARROW_TYPE_UINT8_ARRAY; + break; + case arrow::Type::type::INT8: + type = GARROW_TYPE_INT8_ARRAY; + break; + case arrow::Type::type::UINT16: + type = GARROW_TYPE_UINT16_ARRAY; + break; + case arrow::Type::type::INT16: + type = GARROW_TYPE_INT16_ARRAY; + break; + case arrow::Type::type::UINT32: + type = GARROW_TYPE_UINT32_ARRAY; + break; + case arrow::Type::type::INT32: + type = GARROW_TYPE_INT32_ARRAY; + break; + case arrow::Type::type::UINT64: + type = GARROW_TYPE_UINT64_ARRAY; + break; + case arrow::Type::type::INT64: + type = GARROW_TYPE_INT64_ARRAY; + break; + case arrow::Type::type::FLOAT: + type = GARROW_TYPE_FLOAT_ARRAY; + break; + case arrow::Type::type::DOUBLE: + type = GARROW_TYPE_DOUBLE_ARRAY; + break; + case arrow::Type::type::BINARY: + type = GARROW_TYPE_BINARY_ARRAY; + break; + case arrow::Type::type::LARGE_BINARY: + type = GARROW_TYPE_LARGE_BINARY_ARRAY; + break; + case arrow::Type::type::STRING: + type = GARROW_TYPE_STRING_ARRAY; + break; + case arrow::Type::type::LARGE_STRING: + type = GARROW_TYPE_LARGE_STRING_ARRAY; + break; + case arrow::Type::type::FIXED_SIZE_BINARY: + type = GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY; + break; + case arrow::Type::type::DATE32: + type = GARROW_TYPE_DATE32_ARRAY; + break; + case arrow::Type::type::DATE64: + type = GARROW_TYPE_DATE64_ARRAY; + break; + case arrow::Type::type::TIMESTAMP: + type = GARROW_TYPE_TIMESTAMP_ARRAY; + break; + case arrow::Type::type::TIME32: + type = GARROW_TYPE_TIME32_ARRAY; + break; + case arrow::Type::type::TIME64: + type = GARROW_TYPE_TIME64_ARRAY; + break; + case arrow::Type::type::LIST: + type = GARROW_TYPE_LIST_ARRAY; + break; + case arrow::Type::type::LARGE_LIST: + type = GARROW_TYPE_LARGE_LIST_ARRAY; + break; + case arrow::Type::type::STRUCT: + type = GARROW_TYPE_STRUCT_ARRAY; + break; + case arrow::Type::type::MAP: + type = GARROW_TYPE_MAP_ARRAY; + break; + case arrow::Type::type::SPARSE_UNION: + type = GARROW_TYPE_SPARSE_UNION_ARRAY; + break; + case arrow::Type::type::DENSE_UNION: + type = GARROW_TYPE_DENSE_UNION_ARRAY; + break; + case arrow::Type::type::DICTIONARY: + type = GARROW_TYPE_DICTIONARY_ARRAY; + break; + case arrow::Type::type::DECIMAL128: + type = GARROW_TYPE_DECIMAL128_ARRAY; + break; + case arrow::Type::type::DECIMAL256: + type = GARROW_TYPE_DECIMAL256_ARRAY; + break; + case arrow::Type::type::EXTENSION: + { + auto arrow_data_type = (*arrow_array)->type(); + auto arrow_gextension_data_type = + std::static_pointer_cast<garrow::GExtensionType>(arrow_data_type); + if (arrow_gextension_data_type) { + type = arrow_gextension_data_type->array_gtype(); + } else { + type = GARROW_TYPE_EXTENSION_ARRAY; + } + } + break; + default: + type = GARROW_TYPE_ARRAY; + break; + } + return GARROW_ARRAY(g_object_new_valist(type, + first_property_name, + args)); +} + +GArrowExtensionArray * +garrow_extension_array_new_raw(std::shared_ptr<arrow::Array> *arrow_array, + GArrowArray *storage) +{ + auto array = garrow_array_new_raw(arrow_array, + "array", arrow_array, + "storage", storage, + NULL); + return GARROW_EXTENSION_ARRAY(array); +} + +std::shared_ptr<arrow::Array> +garrow_array_get_raw(GArrowArray *array) +{ + auto priv = GARROW_ARRAY_GET_PRIVATE(array); + return priv->array; +} diff --git a/src/arrow/c_glib/arrow-glib/basic-array.h b/src/arrow/c_glib/arrow-glib/basic-array.h new file mode 100644 index 000000000..15adbbeea --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/basic-array.h @@ -0,0 +1,698 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/basic-data-type.h> +#include <arrow-glib/buffer.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_EQUAL_OPTIONS (garrow_equal_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowEqualOptions, + garrow_equal_options, + GARROW, + EQUAL_OPTIONS, + GObject) +struct _GArrowEqualOptionsClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowEqualOptions * +garrow_equal_options_new(void); +GARROW_AVAILABLE_IN_5_0 +gboolean +garrow_equal_options_is_approx(GArrowEqualOptions *options); + + +#define GARROW_TYPE_ARRAY (garrow_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowArray, + garrow_array, + GARROW, + ARRAY, + GObject) +struct _GArrowArrayClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GArrowArray * +garrow_array_import(gpointer c_abi_array, + GArrowDataType *data_type, + GError **error); + +GARROW_AVAILABLE_IN_6_0 +gboolean +garrow_array_export(GArrowArray *array, + gpointer *c_abi_array, + gpointer *c_abi_schema, + GError **error); + +gboolean garrow_array_equal (GArrowArray *array, + GArrowArray *other_array); +GARROW_AVAILABLE_IN_5_0 +gboolean garrow_array_equal_options(GArrowArray *array, + GArrowArray *other_array, + GArrowEqualOptions *options); +gboolean garrow_array_equal_approx(GArrowArray *array, + GArrowArray *other_array); +gboolean garrow_array_equal_range (GArrowArray *array, + gint64 start_index, + GArrowArray *other_array, + gint64 other_start_index, + gint64 end_index, + GArrowEqualOptions *options); + +gboolean garrow_array_is_null (GArrowArray *array, + gint64 i); +gboolean garrow_array_is_valid (GArrowArray *array, + gint64 i); +gint64 garrow_array_get_length (GArrowArray *array); +gint64 garrow_array_get_offset (GArrowArray *array); +gint64 garrow_array_get_n_nulls (GArrowArray *array); +GArrowBuffer *garrow_array_get_null_bitmap(GArrowArray *array); +GArrowDataType *garrow_array_get_value_data_type(GArrowArray *array); +GArrowType garrow_array_get_value_type(GArrowArray *array); +GArrowArray *garrow_array_slice (GArrowArray *array, + gint64 offset, + gint64 length); +gchar *garrow_array_to_string (GArrowArray *array, + GError **error); +GARROW_AVAILABLE_IN_0_15 +GArrowArray *garrow_array_view(GArrowArray *array, + GArrowDataType *return_type, + GError **error); +GARROW_AVAILABLE_IN_0_15 +gchar *garrow_array_diff_unified(GArrowArray *array, + GArrowArray *other_array); +GARROW_AVAILABLE_IN_4_0 +GArrowArray *garrow_array_concatenate(GArrowArray *array, + GList *other_arrays, + GError **error); + + +#define GARROW_TYPE_NULL_ARRAY (garrow_null_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowNullArray, + garrow_null_array, + GARROW, + NULL_ARRAY, + GArrowArray) +struct _GArrowNullArrayClass +{ + GArrowArrayClass parent_class; +}; + +GArrowNullArray *garrow_null_array_new(gint64 length); + + +#define GARROW_TYPE_PRIMITIVE_ARRAY (garrow_primitive_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowPrimitiveArray, + garrow_primitive_array, + GARROW, + PRIMITIVE_ARRAY, + GArrowArray) +struct _GArrowPrimitiveArrayClass +{ + GArrowArrayClass parent_class; +}; + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_1_0_FOR(garrow_primitive_array_get_data_buffer) +GArrowBuffer * +garrow_primitive_array_get_buffer(GArrowPrimitiveArray *array); +#endif +GARROW_AVAILABLE_IN_1_0 +GArrowBuffer * +garrow_primitive_array_get_data_buffer(GArrowPrimitiveArray *array); + + +#define GARROW_TYPE_BOOLEAN_ARRAY (garrow_boolean_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowBooleanArray, + garrow_boolean_array, + GARROW, + BOOLEAN_ARRAY, + GArrowPrimitiveArray) +struct _GArrowBooleanArrayClass +{ + GArrowPrimitiveArrayClass parent_class; +}; + +GArrowBooleanArray *garrow_boolean_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls); + +gboolean garrow_boolean_array_get_value (GArrowBooleanArray *array, + gint64 i); +gboolean *garrow_boolean_array_get_values(GArrowBooleanArray *array, + gint64 *length); + +#define GARROW_TYPE_NUMERIC_ARRAY (garrow_numeric_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowNumericArray, + garrow_numeric_array, + GARROW, + NUMERIC_ARRAY, + GArrowPrimitiveArray) +struct _GArrowNumericArrayClass +{ + GArrowPrimitiveArrayClass parent_class; +}; + + +#define GARROW_TYPE_INT8_ARRAY (garrow_int8_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowInt8Array, + garrow_int8_array, + GARROW, + INT8_ARRAY, + GArrowNumericArray) +struct _GArrowInt8ArrayClass +{ + GArrowNumericArrayClass parent_class; +}; + +GArrowInt8Array *garrow_int8_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls); + +gint8 garrow_int8_array_get_value(GArrowInt8Array *array, + gint64 i); +const gint8 *garrow_int8_array_get_values(GArrowInt8Array *array, + gint64 *length); + + +#define GARROW_TYPE_UINT8_ARRAY (garrow_uint8_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowUInt8Array, + garrow_uint8_array, + GARROW, + UINT8_ARRAY, + GArrowNumericArray) +struct _GArrowUInt8ArrayClass +{ + GArrowNumericArrayClass parent_class; +}; + +GArrowUInt8Array *garrow_uint8_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls); + +guint8 garrow_uint8_array_get_value(GArrowUInt8Array *array, + gint64 i); +const guint8 *garrow_uint8_array_get_values(GArrowUInt8Array *array, + gint64 *length); + + +#define GARROW_TYPE_INT16_ARRAY (garrow_int16_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowInt16Array, + garrow_int16_array, + GARROW, + INT16_ARRAY, + GArrowNumericArray) +struct _GArrowInt16ArrayClass +{ + GArrowNumericArrayClass parent_class; +}; + +GArrowInt16Array *garrow_int16_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls); + +gint16 garrow_int16_array_get_value(GArrowInt16Array *array, + gint64 i); +const gint16 *garrow_int16_array_get_values(GArrowInt16Array *array, + gint64 *length); + + +#define GARROW_TYPE_UINT16_ARRAY (garrow_uint16_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowUInt16Array, + garrow_uint16_array, + GARROW, + UINT16_ARRAY, + GArrowNumericArray) +struct _GArrowUInt16ArrayClass +{ + GArrowNumericArrayClass parent_class; +}; + +GArrowUInt16Array *garrow_uint16_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls); + +guint16 garrow_uint16_array_get_value(GArrowUInt16Array *array, + gint64 i); +const guint16 *garrow_uint16_array_get_values(GArrowUInt16Array *array, + gint64 *length); + + +#define GARROW_TYPE_INT32_ARRAY (garrow_int32_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowInt32Array, + garrow_int32_array, + GARROW, + INT32_ARRAY, + GArrowNumericArray) +struct _GArrowInt32ArrayClass +{ + GArrowNumericArrayClass parent_class; +}; + +GArrowInt32Array *garrow_int32_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls); + +gint32 garrow_int32_array_get_value(GArrowInt32Array *array, + gint64 i); +const gint32 *garrow_int32_array_get_values(GArrowInt32Array *array, + gint64 *length); + + +#define GARROW_TYPE_UINT32_ARRAY (garrow_uint32_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowUInt32Array, + garrow_uint32_array, + GARROW, + UINT32_ARRAY, + GArrowNumericArray) +struct _GArrowUInt32ArrayClass +{ + GArrowNumericArrayClass parent_class; +}; + +GArrowUInt32Array *garrow_uint32_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls); + +guint32 garrow_uint32_array_get_value(GArrowUInt32Array *array, + gint64 i); +const guint32 *garrow_uint32_array_get_values(GArrowUInt32Array *array, + gint64 *length); + + +#define GARROW_TYPE_INT64_ARRAY (garrow_int64_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowInt64Array, + garrow_int64_array, + GARROW, + INT64_ARRAY, + GArrowNumericArray) +struct _GArrowInt64ArrayClass +{ + GArrowNumericArrayClass parent_class; +}; + +GArrowInt64Array *garrow_int64_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls); + +gint64 garrow_int64_array_get_value(GArrowInt64Array *array, + gint64 i); +const gint64 *garrow_int64_array_get_values(GArrowInt64Array *array, + gint64 *length); + + +#define GARROW_TYPE_UINT64_ARRAY (garrow_uint64_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowUInt64Array, + garrow_uint64_array, + GARROW, + UINT64_ARRAY, + GArrowNumericArray) +struct _GArrowUInt64ArrayClass +{ + GArrowNumericArrayClass parent_class; +}; + +GArrowUInt64Array *garrow_uint64_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls); + +guint64 garrow_uint64_array_get_value(GArrowUInt64Array *array, + gint64 i); +const guint64 *garrow_uint64_array_get_values(GArrowUInt64Array *array, + gint64 *length); + + +#define GARROW_TYPE_FLOAT_ARRAY (garrow_float_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowFloatArray, + garrow_float_array, + GARROW, + FLOAT_ARRAY, + GArrowNumericArray) +struct _GArrowFloatArrayClass +{ + GArrowNumericArrayClass parent_class; +}; + +GArrowFloatArray *garrow_float_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls); + +gfloat garrow_float_array_get_value(GArrowFloatArray *array, + gint64 i); +const gfloat *garrow_float_array_get_values(GArrowFloatArray *array, + gint64 *length); + + +#define GARROW_TYPE_DOUBLE_ARRAY (garrow_double_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDoubleArray, + garrow_double_array, + GARROW, + DOUBLE_ARRAY, + GArrowNumericArray) +struct _GArrowDoubleArrayClass +{ + GArrowNumericArrayClass parent_class; +}; + +GArrowDoubleArray *garrow_double_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls); + +gdouble garrow_double_array_get_value(GArrowDoubleArray *array, + gint64 i); +const gdouble *garrow_double_array_get_values(GArrowDoubleArray *array, + gint64 *length); + + +#define GARROW_TYPE_BINARY_ARRAY (garrow_binary_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowBinaryArray, + garrow_binary_array, + GARROW, + BINARY_ARRAY, + GArrowArray) +struct _GArrowBinaryArrayClass +{ + GArrowArrayClass parent_class; +}; + +GArrowBinaryArray *garrow_binary_array_new(gint64 length, + GArrowBuffer *value_offsets, + GArrowBuffer *value_data, + GArrowBuffer *null_bitmap, + gint64 n_nulls); + +GBytes *garrow_binary_array_get_value(GArrowBinaryArray *array, + gint64 i); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_1_0_FOR(garrow_binary_array_get_data_buffer) +GArrowBuffer *garrow_binary_array_get_buffer(GArrowBinaryArray *array); +#endif +GARROW_AVAILABLE_IN_1_0 +GArrowBuffer *garrow_binary_array_get_data_buffer(GArrowBinaryArray *array); +GArrowBuffer *garrow_binary_array_get_offsets_buffer(GArrowBinaryArray *array); + + +#define GARROW_TYPE_LARGE_BINARY_ARRAY (garrow_large_binary_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowLargeBinaryArray, + garrow_large_binary_array, + GARROW, + LARGE_BINARY_ARRAY, + GArrowArray) +struct _GArrowLargeBinaryArrayClass +{ + GArrowArrayClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_16 +GArrowLargeBinaryArray *garrow_large_binary_array_new(gint64 length, + GArrowBuffer *value_offsets, + GArrowBuffer *value_data, + GArrowBuffer *null_bitmap, + gint64 n_nulls); + +GARROW_AVAILABLE_IN_0_16 +GBytes *garrow_large_binary_array_get_value(GArrowLargeBinaryArray *array, + gint64 i); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_1_0_FOR(garrow_large_binary_array_get_data_buffer) +GARROW_AVAILABLE_IN_0_16 +GArrowBuffer * +garrow_large_binary_array_get_buffer(GArrowLargeBinaryArray *array); +#endif +GARROW_AVAILABLE_IN_1_0 +GArrowBuffer * +garrow_large_binary_array_get_data_buffer(GArrowLargeBinaryArray *array); +GARROW_AVAILABLE_IN_0_16 +GArrowBuffer *garrow_large_binary_array_get_offsets_buffer(GArrowLargeBinaryArray *array); + + +#define GARROW_TYPE_STRING_ARRAY (garrow_string_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowStringArray, + garrow_string_array, + GARROW, + STRING_ARRAY, + GArrowBinaryArray) +struct _GArrowStringArrayClass +{ + GArrowBinaryArrayClass parent_class; +}; + +GArrowStringArray *garrow_string_array_new(gint64 length, + GArrowBuffer *value_offsets, + GArrowBuffer *value_data, + GArrowBuffer *null_bitmap, + gint64 n_nulls); + +gchar *garrow_string_array_get_string(GArrowStringArray *array, + gint64 i); + + +#define GARROW_TYPE_LARGE_STRING_ARRAY (garrow_large_string_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowLargeStringArray, + garrow_large_string_array, + GARROW, + LARGE_STRING_ARRAY, + GArrowLargeBinaryArray) +struct _GArrowLargeStringArrayClass +{ + GArrowLargeBinaryArrayClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_16 +GArrowLargeStringArray *garrow_large_string_array_new(gint64 length, + GArrowBuffer *value_offsets, + GArrowBuffer *value_data, + GArrowBuffer *null_bitmap, + gint64 n_nulls); + +GARROW_AVAILABLE_IN_0_16 +gchar *garrow_large_string_array_get_string(GArrowLargeStringArray *array, + gint64 i); + + +#define GARROW_TYPE_DATE32_ARRAY (garrow_date32_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDate32Array, + garrow_date32_array, + GARROW, + DATE32_ARRAY, + GArrowNumericArray) +struct _GArrowDate32ArrayClass +{ + GArrowNumericArrayClass parent_class; +}; + +GArrowDate32Array *garrow_date32_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls); + +gint32 garrow_date32_array_get_value(GArrowDate32Array *array, + gint64 i); +const gint32 *garrow_date32_array_get_values(GArrowDate32Array *array, + gint64 *length); + + +#define GARROW_TYPE_DATE64_ARRAY (garrow_date64_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDate64Array, + garrow_date64_array, + GARROW, + DATE64_ARRAY, + GArrowNumericArray) +struct _GArrowDate64ArrayClass +{ + GArrowNumericArrayClass parent_class; +}; + +GArrowDate64Array *garrow_date64_array_new(gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls); + +gint64 garrow_date64_array_get_value(GArrowDate64Array *array, + gint64 i); +const gint64 *garrow_date64_array_get_values(GArrowDate64Array *array, + gint64 *length); + + +#define GARROW_TYPE_TIMESTAMP_ARRAY (garrow_timestamp_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowTimestampArray, + garrow_timestamp_array, + GARROW, + TIMESTAMP_ARRAY, + GArrowNumericArray) +struct _GArrowTimestampArrayClass +{ + GArrowNumericArrayClass parent_class; +}; + +GArrowTimestampArray *garrow_timestamp_array_new(GArrowTimestampDataType *data_type, + gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls); + +gint64 garrow_timestamp_array_get_value(GArrowTimestampArray *array, + gint64 i); +const gint64 *garrow_timestamp_array_get_values(GArrowTimestampArray *array, + gint64 *length); + + +#define GARROW_TYPE_TIME32_ARRAY (garrow_time32_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowTime32Array, + garrow_time32_array, + GARROW, + TIME32_ARRAY, + GArrowNumericArray) +struct _GArrowTime32ArrayClass +{ + GArrowNumericArrayClass parent_class; +}; + +GArrowTime32Array *garrow_time32_array_new(GArrowTime32DataType *data_type, + gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls); + +gint32 garrow_time32_array_get_value(GArrowTime32Array *array, + gint64 i); +const gint32 *garrow_time32_array_get_values(GArrowTime32Array *array, + gint64 *length); + + +#define GARROW_TYPE_TIME64_ARRAY (garrow_time64_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowTime64Array, + garrow_time64_array, + GARROW, + TIME64_ARRAY, + GArrowNumericArray) +struct _GArrowTime64ArrayClass +{ + GArrowNumericArrayClass parent_class; +}; + +GArrowTime64Array *garrow_time64_array_new(GArrowTime64DataType *data_type, + gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls); + +gint64 garrow_time64_array_get_value(GArrowTime64Array *array, + gint64 i); +const gint64 *garrow_time64_array_get_values(GArrowTime64Array *array, + gint64 *length); + + +#define GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY (garrow_fixed_size_binary_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeBinaryArray, + garrow_fixed_size_binary_array, + GARROW, + FIXED_SIZE_BINARY_ARRAY, + GArrowPrimitiveArray) +struct _GArrowFixedSizeBinaryArrayClass +{ + GArrowPrimitiveArrayClass parent_class; +}; + +GARROW_AVAILABLE_IN_3_0 +GArrowFixedSizeBinaryArray * +garrow_fixed_size_binary_array_new(GArrowFixedSizeBinaryDataType *data_type, + gint64 length, + GArrowBuffer *data, + GArrowBuffer *null_bitmap, + gint64 n_nulls); +GARROW_AVAILABLE_IN_3_0 +gint32 +garrow_fixed_size_binary_array_get_byte_width(GArrowFixedSizeBinaryArray *array); +GARROW_AVAILABLE_IN_3_0 +GBytes * +garrow_fixed_size_binary_array_get_value(GArrowFixedSizeBinaryArray *array, + gint64 i); +GARROW_AVAILABLE_IN_3_0 +GBytes * +garrow_fixed_size_binary_array_get_values_bytes(GArrowFixedSizeBinaryArray *array); + + +#define GARROW_TYPE_DECIMAL128_ARRAY (garrow_decimal128_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128Array, + garrow_decimal128_array, + GARROW, + DECIMAL128_ARRAY, + GArrowFixedSizeBinaryArray) +struct _GArrowDecimal128ArrayClass +{ + GArrowFixedSizeBinaryArrayClass parent_class; +}; + +gchar *garrow_decimal128_array_format_value(GArrowDecimal128Array *array, + gint64 i); +GArrowDecimal128 *garrow_decimal128_array_get_value(GArrowDecimal128Array *array, + gint64 i); + +#define GARROW_TYPE_DECIMAL256_ARRAY (garrow_decimal256_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256Array, + garrow_decimal256_array, + GARROW, + DECIMAL256_ARRAY, + GArrowFixedSizeBinaryArray) +struct _GArrowDecimal256ArrayClass +{ + GArrowFixedSizeBinaryArrayClass parent_class; +}; + +gchar *garrow_decimal256_array_format_value(GArrowDecimal256Array *array, + gint64 i); +GArrowDecimal256 *garrow_decimal256_array_get_value(GArrowDecimal256Array *array, + gint64 i); + +#define GARROW_TYPE_EXTENSION_ARRAY (garrow_extension_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowExtensionArray, + garrow_extension_array, + GARROW, + EXTENSION_ARRAY, + GArrowArray) +struct _GArrowExtensionArrayClass +{ + GArrowArrayClass parent_class; +}; + +GARROW_AVAILABLE_IN_3_0 +GArrowArray * +garrow_extension_array_get_storage(GArrowExtensionArray *array); + + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/basic-array.hpp b/src/arrow/c_glib/arrow-glib/basic-array.hpp new file mode 100644 index 000000000..3ef1c1969 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/basic-array.hpp @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/api.h> + +#include <arrow-glib/basic-array.h> + +arrow::EqualOptions * +garrow_equal_options_get_raw(GArrowEqualOptions *equal_options); + +GArrowArray * +garrow_array_new_raw(std::shared_ptr<arrow::Array> *arrow_array); +GArrowArray * +garrow_array_new_raw(std::shared_ptr<arrow::Array> *arrow_array, + const gchar *first_property_name, + ...); +GArrowArray * +garrow_array_new_raw_valist(std::shared_ptr<arrow::Array> *arrow_array, + const gchar *first_property_name, + va_list args); +GArrowExtensionArray * +garrow_extension_array_new_raw(std::shared_ptr<arrow::Array> *arrow_array, + GArrowArray *storage); +std::shared_ptr<arrow::Array> +garrow_array_get_raw(GArrowArray *array); + +template <typename DataType> +inline std::shared_ptr<typename arrow::TypeTraits<DataType>::ArrayType> +garrow_array_get_raw(GArrowArray *array) { + auto arrow_array = garrow_array_get_raw(array); + return std::static_pointer_cast<typename arrow::TypeTraits<DataType>::ArrayType>(arrow_array); +} diff --git a/src/arrow/c_glib/arrow-glib/basic-data-type.cpp b/src/arrow/c_glib/arrow-glib/basic-data-type.cpp new file mode 100644 index 000000000..47ff79e61 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/basic-data-type.cpp @@ -0,0 +1,2041 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/array.hpp> +#include <arrow-glib/chunked-array.hpp> +#include <arrow-glib/data-type.hpp> +#include <arrow-glib/enums.h> +#include <arrow-glib/error.hpp> +#include <arrow-glib/field.hpp> +#include <arrow-glib/type.hpp> + +#include <arrow/c/bridge.h> + +G_BEGIN_DECLS + +/** + * SECTION: basic-data-type + * @section_id: basic-data-type-classes + * @title: Basic data type classes + * @include: arrow-glib/arrow-glib.h + * + * #GArrowDataType is a base class for all data type classes such as + * #GArrowBooleanDataType. + * + * #GArrowNullDataType is a class for the null data type. + * + * #GArrowBooleanDataType is a class for the boolean data type. + * + * #GArrowInt8DataType is a class for the 8-bit integer data type. + * + * #GArrowUInt8DataType is a class for the 8-bit unsigned integer data type. + * + * #GArrowInt16DataType is a class for the 16-bit integer data type. + * + * #GArrowUInt16DataType is a class for the 16-bit unsigned integer data type. + * + * #GArrowInt32DataType is a class for the 32-bit integer data type. + * + * #GArrowUInt32DataType is a class for the 32-bit unsigned integer data type. + * + * #GArrowInt64DataType is a class for the 64-bit integer data type. + * + * #GArrowUInt64DataType is a class for the 64-bit unsigned integer data type. + * + * #GArrowFloatDataType is a class for the 32-bit floating point data + * type. + * + * #GArrowDoubleDataType is a class for the 64-bit floating point data + * type. + * + * #GArrowBinaryDataType is a class for the binary data type. + * + * #GArrowLargeBinaryDataType is a class for the 64-bit offsets binary + * data type. + * + * #GArrowFixedSizeBinaryDataType is a class for the fixed-size binary + * data type. + * + * #GArrowStringDataType is a class for the UTF-8 encoded string data + * type. + * + * #GArrowLargeStringDataType is a class for the 64-bit offsets UTF-8 + * encoded string data type. + * + * #GArrowDate32DataType is a class for the number of days since UNIX + * epoch in the 32-bit signed integer data type. + * + * #GArrowDate64DataType is a class for the number of milliseconds + * since UNIX epoch in the 64-bit signed integer data type. + * + * #GArrowTimestampDataType is a class for the number of + * seconds/milliseconds/microseconds/nanoseconds since UNIX epoch in + * the 64-bit signed integer data type. + * + * #GArrowTime32DataType is a class for the number of seconds or + * milliseconds since midnight in the 32-bit signed integer data type. + * + * #GArrowTime64DataType is a class for the number of microseconds or + * nanoseconds since midnight in the 64-bit signed integer data type. + * + * #GArrowDecimalDataType is a base class for the decimal data types. + * + * #GArrowDecimal128DataType is a class for the 128-bit decimal data type. + * + * #GArrowDecimal256DataType is a class for the 256-bit decimal data type. + * + * #GArrowExtensionDataType is a base class for user-defined extension + * data types. + * + * #GArrowExtensionDataTypeRegistry is a class to manage extension + * data types. + */ + +typedef struct GArrowDataTypePrivate_ { + std::shared_ptr<arrow::DataType> data_type; +} GArrowDataTypePrivate; + +enum { + PROP_DATA_TYPE = 1 +}; + +G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowDataType, + garrow_data_type, + G_TYPE_OBJECT) + +#define GARROW_DATA_TYPE_GET_PRIVATE(obj) \ + static_cast<GArrowDataTypePrivate *>( \ + garrow_data_type_get_instance_private( \ + GARROW_DATA_TYPE(obj))) + +static void +garrow_data_type_finalize(GObject *object) +{ + auto priv = GARROW_DATA_TYPE_GET_PRIVATE(object); + + priv->data_type.~shared_ptr(); + + G_OBJECT_CLASS(garrow_data_type_parent_class)->finalize(object); +} + +static void +garrow_data_type_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_DATA_TYPE_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_DATA_TYPE: + { + auto data_type = g_value_get_pointer(value); + if (data_type) { + priv->data_type = + *static_cast<std::shared_ptr<arrow::DataType> *>(data_type); + } + } + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_data_type_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + switch (prop_id) { + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_data_type_init(GArrowDataType *object) +{ + auto priv = GARROW_DATA_TYPE_GET_PRIVATE(object); + new(&priv->data_type) std::shared_ptr<arrow::DataType>; +} + +static void +garrow_data_type_class_init(GArrowDataTypeClass *klass) +{ + GObjectClass *gobject_class; + GParamSpec *spec; + + gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_data_type_finalize; + gobject_class->set_property = garrow_data_type_set_property; + gobject_class->get_property = garrow_data_type_get_property; + + spec = g_param_spec_pointer("data-type", + "Data type", + "The raw std::shared<arrow::DataType> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_DATA_TYPE, spec); +} + +/** + * garrow_data_type_import: + * @c_abi_schema: (not nullable): A `struct ArrowSchema *`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): An imported #GArrowDataType on success, + * %NULL on error. + * + * You don't need to release the passed `struct ArrowSchema *`, + * even if this function reports an error. + * + * Since: 6.0.0 + */ +GArrowDataType * +garrow_data_type_import(gpointer c_abi_schema, GError **error) +{ + auto arrow_data_type_result = + arrow::ImportType(static_cast<ArrowSchema *>(c_abi_schema)); + if (garrow::check(error, arrow_data_type_result, "[data-type][import]")) { + return garrow_data_type_new_raw(&(*arrow_data_type_result)); + } else { + return NULL; + } +} + +/** + * garrow_data_type_export: + * @data_type: A #GArrowDataType. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): An exported #GArrowDataType as + * `struct ArrowStruct *` on success, %NULL on error. + * + * It should be freed with the `ArrowSchema::release` callback then + * g_free() when no longer needed. + * + * Since: 6.0.0 + */ +gpointer +garrow_data_type_export(GArrowDataType *data_type, GError **error) +{ + const auto arrow_data_type = garrow_data_type_get_raw(data_type); + auto c_abi_schema = g_new(ArrowSchema, 1); + auto status = arrow::ExportType(*arrow_data_type, c_abi_schema); + if (garrow::check(error, status, "[data-type][export]")) { + return c_abi_schema; + } else { + g_free(c_abi_schema); + return NULL; + } +} + +/** + * garrow_data_type_equal: + * @data_type: A #GArrowDataType. + * @other_data_type: A #GArrowDataType to be compared. + * + * Returns: %TRUE if both of them have the same data, %FALSE + * otherwise. + */ +gboolean +garrow_data_type_equal(GArrowDataType *data_type, + GArrowDataType *other_data_type) +{ + const auto arrow_data_type = garrow_data_type_get_raw(data_type); + const auto arrow_other_data_type = garrow_data_type_get_raw(other_data_type); + return arrow_data_type->Equals(arrow_other_data_type); +} + +/** + * garrow_data_type_to_string: + * @data_type: A #GArrowDataType. + * + * Returns: The string representation of the data type. + * + * It should be freed with g_free() when no longer needed. + */ +gchar * +garrow_data_type_to_string(GArrowDataType *data_type) +{ + const auto arrow_data_type = garrow_data_type_get_raw(data_type); + return g_strdup(arrow_data_type->ToString().c_str()); +} + +/** + * garrow_data_type_get_id: + * @data_type: A #GArrowDataType. + * + * Returns: The #GArrowType of the data type. + */ +GArrowType +garrow_data_type_get_id(GArrowDataType *data_type) +{ + const auto arrow_data_type = garrow_data_type_get_raw(data_type); + return garrow_type_from_raw(arrow_data_type->id()); +} + +/** + * garrow_data_type_get_name: + * @data_type: A #GArrowDataType. + * + * Returns: The name of the data type. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 3.0.0 + */ +gchar * +garrow_data_type_get_name(GArrowDataType *data_type) +{ + const auto arrow_data_type = garrow_data_type_get_raw(data_type); + return g_strdup(arrow_data_type->name().c_str()); +} + + +G_DEFINE_ABSTRACT_TYPE(GArrowFixedWidthDataType, + garrow_fixed_width_data_type, + GARROW_TYPE_DATA_TYPE) + +static void +garrow_fixed_width_data_type_init(GArrowFixedWidthDataType *object) +{ +} + +static void +garrow_fixed_width_data_type_class_init(GArrowFixedWidthDataTypeClass *klass) +{ +} + +/** + * garrow_fixed_width_data_type_get_bit_width: + * @data_type: A #GArrowFixedWidthDataType. + * + * Returns: The number of bits for one data. + */ +gint +garrow_fixed_width_data_type_get_bit_width(GArrowFixedWidthDataType *data_type) +{ + const auto arrow_data_type = + garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + const auto arrow_fixed_width_type = + std::static_pointer_cast<arrow::FixedWidthType>(arrow_data_type); + return arrow_fixed_width_type->bit_width(); +} + + +G_DEFINE_TYPE(GArrowNullDataType, + garrow_null_data_type, + GARROW_TYPE_DATA_TYPE) + +static void +garrow_null_data_type_init(GArrowNullDataType *object) +{ +} + +static void +garrow_null_data_type_class_init(GArrowNullDataTypeClass *klass) +{ +} + +/** + * garrow_null_data_type_new: + * + * Returns: The newly created null data type. + */ +GArrowNullDataType * +garrow_null_data_type_new(void) +{ + auto arrow_data_type = arrow::null(); + + GArrowNullDataType *data_type = + GARROW_NULL_DATA_TYPE(g_object_new(GARROW_TYPE_NULL_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + + +G_DEFINE_TYPE(GArrowBooleanDataType, + garrow_boolean_data_type, + GARROW_TYPE_FIXED_WIDTH_DATA_TYPE) + +static void +garrow_boolean_data_type_init(GArrowBooleanDataType *object) +{ +} + +static void +garrow_boolean_data_type_class_init(GArrowBooleanDataTypeClass *klass) +{ +} + +/** + * garrow_boolean_data_type_new: + * + * Returns: The newly created boolean data type. + */ +GArrowBooleanDataType * +garrow_boolean_data_type_new(void) +{ + auto arrow_data_type = arrow::boolean(); + + GArrowBooleanDataType *data_type = + GARROW_BOOLEAN_DATA_TYPE(g_object_new(GARROW_TYPE_BOOLEAN_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + + +G_DEFINE_ABSTRACT_TYPE(GArrowNumericDataType, + garrow_numeric_data_type, + GARROW_TYPE_FIXED_WIDTH_DATA_TYPE) + +static void +garrow_numeric_data_type_init(GArrowNumericDataType *object) +{ +} + +static void +garrow_numeric_data_type_class_init(GArrowNumericDataTypeClass *klass) +{ +} + + +G_DEFINE_ABSTRACT_TYPE(GArrowIntegerDataType, + garrow_integer_data_type, + GARROW_TYPE_NUMERIC_DATA_TYPE) + +static void +garrow_integer_data_type_init(GArrowIntegerDataType *object) +{ +} + +static void +garrow_integer_data_type_class_init(GArrowIntegerDataTypeClass *klass) +{ +} + +/** + * garrow_integer_data_type_is_signed: + * @data_type: A #GArrowIntegerDataType. + * + * Returns: %TRUE if the data type is signed, %FALSE otherwise. + * + * Since: 0.16.0 + */ +gboolean +garrow_integer_data_type_is_signed(GArrowIntegerDataType *data_type) +{ + const auto arrow_data_type = + garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + const auto arrow_integer_type = + std::static_pointer_cast<arrow::IntegerType>(arrow_data_type); + return arrow_integer_type->is_signed(); +} + +G_DEFINE_TYPE(GArrowInt8DataType, + garrow_int8_data_type, + GARROW_TYPE_INTEGER_DATA_TYPE) + +static void +garrow_int8_data_type_init(GArrowInt8DataType *object) +{ +} + +static void +garrow_int8_data_type_class_init(GArrowInt8DataTypeClass *klass) +{ +} + +/** + * garrow_int8_data_type_new: + * + * Returns: The newly created 8-bit integer data type. + */ +GArrowInt8DataType * +garrow_int8_data_type_new(void) +{ + auto arrow_data_type = arrow::int8(); + + GArrowInt8DataType *data_type = + GARROW_INT8_DATA_TYPE(g_object_new(GARROW_TYPE_INT8_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + + +G_DEFINE_TYPE(GArrowUInt8DataType, + garrow_uint8_data_type, + GARROW_TYPE_INTEGER_DATA_TYPE) + +static void +garrow_uint8_data_type_init(GArrowUInt8DataType *object) +{ +} + +static void +garrow_uint8_data_type_class_init(GArrowUInt8DataTypeClass *klass) +{ +} + +/** + * garrow_uint8_data_type_new: + * + * Returns: The newly created 8-bit unsigned integer data type. + */ +GArrowUInt8DataType * +garrow_uint8_data_type_new(void) +{ + auto arrow_data_type = arrow::uint8(); + + GArrowUInt8DataType *data_type = + GARROW_UINT8_DATA_TYPE(g_object_new(GARROW_TYPE_UINT8_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + + +G_DEFINE_TYPE(GArrowInt16DataType, + garrow_int16_data_type, + GARROW_TYPE_INTEGER_DATA_TYPE) + +static void +garrow_int16_data_type_init(GArrowInt16DataType *object) +{ +} + +static void +garrow_int16_data_type_class_init(GArrowInt16DataTypeClass *klass) +{ +} + +/** + * garrow_int16_data_type_new: + * + * Returns: The newly created 16-bit integer data type. + */ +GArrowInt16DataType * +garrow_int16_data_type_new(void) +{ + auto arrow_data_type = arrow::int16(); + + GArrowInt16DataType *data_type = + GARROW_INT16_DATA_TYPE(g_object_new(GARROW_TYPE_INT16_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + + +G_DEFINE_TYPE(GArrowUInt16DataType, + garrow_uint16_data_type, + GARROW_TYPE_INTEGER_DATA_TYPE) + +static void +garrow_uint16_data_type_init(GArrowUInt16DataType *object) +{ +} + +static void +garrow_uint16_data_type_class_init(GArrowUInt16DataTypeClass *klass) +{ +} + +/** + * garrow_uint16_data_type_new: + * + * Returns: The newly created 16-bit unsigned integer data type. + */ +GArrowUInt16DataType * +garrow_uint16_data_type_new(void) +{ + auto arrow_data_type = arrow::uint16(); + + GArrowUInt16DataType *data_type = + GARROW_UINT16_DATA_TYPE(g_object_new(GARROW_TYPE_UINT16_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + + +G_DEFINE_TYPE(GArrowInt32DataType, + garrow_int32_data_type, + GARROW_TYPE_INTEGER_DATA_TYPE) + +static void +garrow_int32_data_type_init(GArrowInt32DataType *object) +{ +} + +static void +garrow_int32_data_type_class_init(GArrowInt32DataTypeClass *klass) +{ +} + +/** + * garrow_int32_data_type_new: + * + * Returns: The newly created 32-bit integer data type. + */ +GArrowInt32DataType * +garrow_int32_data_type_new(void) +{ + auto arrow_data_type = arrow::int32(); + + GArrowInt32DataType *data_type = + GARROW_INT32_DATA_TYPE(g_object_new(GARROW_TYPE_INT32_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + + +G_DEFINE_TYPE(GArrowUInt32DataType, + garrow_uint32_data_type, + GARROW_TYPE_INTEGER_DATA_TYPE) + +static void +garrow_uint32_data_type_init(GArrowUInt32DataType *object) +{ +} + +static void +garrow_uint32_data_type_class_init(GArrowUInt32DataTypeClass *klass) +{ +} + +/** + * garrow_uint32_data_type_new: + * + * Returns: The newly created 32-bit unsigned integer data type. + */ +GArrowUInt32DataType * +garrow_uint32_data_type_new(void) +{ + auto arrow_data_type = arrow::uint32(); + + GArrowUInt32DataType *data_type = + GARROW_UINT32_DATA_TYPE(g_object_new(GARROW_TYPE_UINT32_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + + +G_DEFINE_TYPE(GArrowInt64DataType, + garrow_int64_data_type, + GARROW_TYPE_INTEGER_DATA_TYPE) + +static void +garrow_int64_data_type_init(GArrowInt64DataType *object) +{ +} + +static void +garrow_int64_data_type_class_init(GArrowInt64DataTypeClass *klass) +{ +} + +/** + * garrow_int64_data_type_new: + * + * Returns: The newly created 64-bit integer data type. + */ +GArrowInt64DataType * +garrow_int64_data_type_new(void) +{ + auto arrow_data_type = arrow::int64(); + + GArrowInt64DataType *data_type = + GARROW_INT64_DATA_TYPE(g_object_new(GARROW_TYPE_INT64_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + + +G_DEFINE_TYPE(GArrowUInt64DataType, + garrow_uint64_data_type, + GARROW_TYPE_INTEGER_DATA_TYPE) + +static void +garrow_uint64_data_type_init(GArrowUInt64DataType *object) +{ +} + +static void +garrow_uint64_data_type_class_init(GArrowUInt64DataTypeClass *klass) +{ +} + +/** + * garrow_uint64_data_type_new: + * + * Returns: The newly created 64-bit unsigned integer data type. + */ +GArrowUInt64DataType * +garrow_uint64_data_type_new(void) +{ + auto arrow_data_type = arrow::uint64(); + + GArrowUInt64DataType *data_type = + GARROW_UINT64_DATA_TYPE(g_object_new(GARROW_TYPE_UINT64_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + + +G_DEFINE_ABSTRACT_TYPE(GArrowFloatingPointDataType, + garrow_floating_point_data_type, + GARROW_TYPE_NUMERIC_DATA_TYPE) + +static void +garrow_floating_point_data_type_init(GArrowFloatingPointDataType *object) +{ +} + +static void +garrow_floating_point_data_type_class_init(GArrowFloatingPointDataTypeClass *klass) +{ +} + + +G_DEFINE_TYPE(GArrowFloatDataType, + garrow_float_data_type, + GARROW_TYPE_FLOATING_POINT_DATA_TYPE) + +static void +garrow_float_data_type_init(GArrowFloatDataType *object) +{ +} + +static void +garrow_float_data_type_class_init(GArrowFloatDataTypeClass *klass) +{ +} + +/** + * garrow_float_data_type_new: + * + * Returns: The newly created float data type. + */ +GArrowFloatDataType * +garrow_float_data_type_new(void) +{ + auto arrow_data_type = arrow::float32(); + + GArrowFloatDataType *data_type = + GARROW_FLOAT_DATA_TYPE(g_object_new(GARROW_TYPE_FLOAT_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + + +G_DEFINE_TYPE(GArrowDoubleDataType, + garrow_double_data_type, + GARROW_TYPE_FLOATING_POINT_DATA_TYPE) + +static void +garrow_double_data_type_init(GArrowDoubleDataType *object) +{ +} + +static void +garrow_double_data_type_class_init(GArrowDoubleDataTypeClass *klass) +{ +} + +/** + * garrow_double_data_type_new: + * + * Returns: The newly created 64-bit floating point data type. + */ +GArrowDoubleDataType * +garrow_double_data_type_new(void) +{ + auto arrow_data_type = arrow::float64(); + + GArrowDoubleDataType *data_type = + GARROW_DOUBLE_DATA_TYPE(g_object_new(GARROW_TYPE_DOUBLE_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + + +G_DEFINE_TYPE(GArrowBinaryDataType, + garrow_binary_data_type, + GARROW_TYPE_DATA_TYPE) + +static void +garrow_binary_data_type_init(GArrowBinaryDataType *object) +{ +} + +static void +garrow_binary_data_type_class_init(GArrowBinaryDataTypeClass *klass) +{ +} + +/** + * garrow_binary_data_type_new: + * + * Returns: The newly created binary data type. + */ +GArrowBinaryDataType * +garrow_binary_data_type_new(void) +{ + auto arrow_data_type = arrow::binary(); + + GArrowBinaryDataType *data_type = + GARROW_BINARY_DATA_TYPE(g_object_new(GARROW_TYPE_BINARY_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + + +G_DEFINE_TYPE(GArrowFixedSizeBinaryDataType, + garrow_fixed_size_binary_data_type, + GARROW_TYPE_FIXED_WIDTH_DATA_TYPE) + +static void +garrow_fixed_size_binary_data_type_init(GArrowFixedSizeBinaryDataType *object) +{ +} + +static void +garrow_fixed_size_binary_data_type_class_init(GArrowFixedSizeBinaryDataTypeClass *klass) +{ +} + +/** + * garrow_fixed_size_binary_data_type: + * @byte_width: The byte width. + * + * Returns: The newly created fixed-size binary data type. + * + * Since: 0.12.0 + */ +GArrowFixedSizeBinaryDataType * +garrow_fixed_size_binary_data_type_new(gint32 byte_width) +{ + auto arrow_fixed_size_binary_data_type = arrow::fixed_size_binary(byte_width); + + auto fixed_size_binary_data_type = + GARROW_FIXED_SIZE_BINARY_DATA_TYPE(g_object_new(GARROW_TYPE_FIXED_SIZE_BINARY_DATA_TYPE, + "data-type", &arrow_fixed_size_binary_data_type, + NULL)); + return fixed_size_binary_data_type; +} + +/** + * garrow_fixed_size_binary_data_type_get_byte_width: + * @data_type: A #GArrowFixedSizeBinaryDataType. + * + * Returns: The number of bytes for one data. + * + * Since: 0.12.0 + */ +gint32 +garrow_fixed_size_binary_data_type_get_byte_width(GArrowFixedSizeBinaryDataType *data_type) +{ + const auto arrow_data_type = + garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + const auto arrow_fixed_size_binary_type = + std::static_pointer_cast<arrow::FixedSizeBinaryType>(arrow_data_type); + return arrow_fixed_size_binary_type->byte_width(); +} + + +G_DEFINE_TYPE(GArrowLargeBinaryDataType, + garrow_large_binary_data_type, + GARROW_TYPE_DATA_TYPE) + +static void +garrow_large_binary_data_type_init(GArrowLargeBinaryDataType *object) +{ +} + +static void +garrow_large_binary_data_type_class_init(GArrowLargeBinaryDataTypeClass *klass) +{ +} + +/** + * garrow_large_binary_data_type_new: + * + * Returns: The newly created #GArrowLargeBinaryDataType. + * + * Since: 0.17.0 + */ +GArrowLargeBinaryDataType * +garrow_large_binary_data_type_new(void) +{ + auto arrow_data_type = arrow::large_binary(); + + GArrowLargeBinaryDataType *data_type = + GARROW_LARGE_BINARY_DATA_TYPE(g_object_new(GARROW_TYPE_LARGE_BINARY_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + + +G_DEFINE_TYPE(GArrowStringDataType, + garrow_string_data_type, + GARROW_TYPE_DATA_TYPE) + +static void +garrow_string_data_type_init(GArrowStringDataType *object) +{ +} + +static void +garrow_string_data_type_class_init(GArrowStringDataTypeClass *klass) +{ +} + +/** + * garrow_string_data_type_new: + * + * Returns: The newly created UTF-8 encoded string data type. + */ +GArrowStringDataType * +garrow_string_data_type_new(void) +{ + auto arrow_data_type = arrow::utf8(); + + GArrowStringDataType *data_type = + GARROW_STRING_DATA_TYPE(g_object_new(GARROW_TYPE_STRING_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + + +G_DEFINE_TYPE(GArrowLargeStringDataType, + garrow_large_string_data_type, + GARROW_TYPE_DATA_TYPE) + +static void +garrow_large_string_data_type_init(GArrowLargeStringDataType *object) +{ +} + +static void +garrow_large_string_data_type_class_init(GArrowLargeStringDataTypeClass *klass) +{ +} + +/** + * garrow_large_string_data_type_new: + * + * Returns: The newly created #GArrowLargeStringDataType. + * + * Since: 0.17.0 + */ +GArrowLargeStringDataType * +garrow_large_string_data_type_new(void) +{ + auto arrow_data_type = arrow::large_utf8(); + + GArrowLargeStringDataType *data_type = + GARROW_LARGE_STRING_DATA_TYPE(g_object_new(GARROW_TYPE_LARGE_STRING_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + + +G_DEFINE_TYPE(GArrowDate32DataType, + garrow_date32_data_type, + GARROW_TYPE_DATA_TYPE) + +static void +garrow_date32_data_type_init(GArrowDate32DataType *object) +{ +} + +static void +garrow_date32_data_type_class_init(GArrowDate32DataTypeClass *klass) +{ +} + +/** + * garrow_date32_data_type_new: + * + * Returns: A newly created the number of milliseconds + * since UNIX epoch in 32-bit signed integer data type. + * + * Since: 0.7.0 + */ +GArrowDate32DataType * +garrow_date32_data_type_new(void) +{ + auto arrow_data_type = arrow::date32(); + + GArrowDate32DataType *data_type = + GARROW_DATE32_DATA_TYPE(g_object_new(GARROW_TYPE_DATE32_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + + +G_DEFINE_TYPE(GArrowDate64DataType, + garrow_date64_data_type, + GARROW_TYPE_DATA_TYPE) + +static void +garrow_date64_data_type_init(GArrowDate64DataType *object) +{ +} + +static void +garrow_date64_data_type_class_init(GArrowDate64DataTypeClass *klass) +{ +} + +/** + * garrow_date64_data_type_new: + * + * Returns: A newly created the number of milliseconds + * since UNIX epoch in 64-bit signed integer data type. + * + * Since: 0.7.0 + */ +GArrowDate64DataType * +garrow_date64_data_type_new(void) +{ + auto arrow_data_type = arrow::date64(); + + GArrowDate64DataType *data_type = + GARROW_DATE64_DATA_TYPE(g_object_new(GARROW_TYPE_DATE64_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + + +G_DEFINE_TYPE(GArrowTimestampDataType, + garrow_timestamp_data_type, + GARROW_TYPE_DATA_TYPE) + +static void +garrow_timestamp_data_type_init(GArrowTimestampDataType *object) +{ +} + +static void +garrow_timestamp_data_type_class_init(GArrowTimestampDataTypeClass *klass) +{ +} + +/** + * garrow_timestamp_data_type_new: + * @unit: The unit of the timestamp data. + * + * Returns: A newly created the number of + * seconds/milliseconds/microseconds/nanoseconds since UNIX epoch in + * 64-bit signed integer data type. + * + * Since: 0.7.0 + */ +GArrowTimestampDataType * +garrow_timestamp_data_type_new(GArrowTimeUnit unit) +{ + auto arrow_unit = garrow_time_unit_to_raw(unit); + auto arrow_data_type = arrow::timestamp(arrow_unit); + auto data_type = + GARROW_TIMESTAMP_DATA_TYPE(g_object_new(GARROW_TYPE_TIMESTAMP_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + +/** + * garrow_timestamp_data_type_get_unit: + * @timestamp_data_type: The #GArrowTimestampDataType. + * + * Returns: The unit of the timestamp data type. + * + * Since: 0.8.0 + */ +GArrowTimeUnit +garrow_timestamp_data_type_get_unit(GArrowTimestampDataType *timestamp_data_type) +{ + const auto arrow_data_type = + garrow_data_type_get_raw(GARROW_DATA_TYPE(timestamp_data_type)); + const auto arrow_timestamp_data_type = + std::static_pointer_cast<arrow::TimestampType>(arrow_data_type); + return garrow_time_unit_from_raw(arrow_timestamp_data_type->unit()); +} + + +G_DEFINE_ABSTRACT_TYPE(GArrowTimeDataType, + garrow_time_data_type, + GARROW_TYPE_DATA_TYPE) + +static void +garrow_time_data_type_init(GArrowTimeDataType *object) +{ +} + +static void +garrow_time_data_type_class_init(GArrowTimeDataTypeClass *klass) +{ +} + +/** + * garrow_time_data_type_get_unit: + * @time_data_type: The #GArrowTimeDataType. + * + * Returns: The unit of the time data type. + * + * Since: 0.7.0 + */ +GArrowTimeUnit +garrow_time_data_type_get_unit(GArrowTimeDataType *time_data_type) +{ + const auto arrow_data_type = + garrow_data_type_get_raw(GARROW_DATA_TYPE(time_data_type)); + const auto arrow_time_data_type = + std::static_pointer_cast<arrow::TimeType>(arrow_data_type); + return garrow_time_unit_from_raw(arrow_time_data_type->unit()); +} + + +G_DEFINE_TYPE(GArrowTime32DataType, + garrow_time32_data_type, + GARROW_TYPE_TIME_DATA_TYPE) + +static void +garrow_time32_data_type_init(GArrowTime32DataType *object) +{ +} + +static void +garrow_time32_data_type_class_init(GArrowTime32DataTypeClass *klass) +{ +} + +/** + * garrow_time32_data_type_new: + * @unit: %GARROW_TIME_UNIT_SECOND or %GARROW_TIME_UNIT_MILLI. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): + * A newly created the number of seconds or milliseconds since + * midnight in 32-bit signed integer data type. + * + * Since: 0.7.0 + */ +GArrowTime32DataType * +garrow_time32_data_type_new(GArrowTimeUnit unit, GError **error) +{ + switch (unit) { + case GARROW_TIME_UNIT_SECOND: + case GARROW_TIME_UNIT_MILLI: + break; + default: + { + auto enum_class = G_ENUM_CLASS(g_type_class_ref(GARROW_TYPE_TIME_UNIT)); + GEnumValue *value = g_enum_get_value(enum_class, unit); + if (value) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "[time32-data-type][new] time unit must be second or milli: " + "<%s>", + value->value_nick); + } else { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "[time32-data-type][new] " + "time unit must be second(%d) or milli(%d): <%d>", + GARROW_TIME_UNIT_SECOND, + GARROW_TIME_UNIT_MILLI, + unit); + } + g_type_class_unref(enum_class); + } + return NULL; + } + + auto arrow_unit = garrow_time_unit_to_raw(unit); + auto arrow_data_type = arrow::time32(arrow_unit); + auto data_type = + GARROW_TIME32_DATA_TYPE(g_object_new(GARROW_TYPE_TIME32_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + + +G_DEFINE_TYPE(GArrowTime64DataType, + garrow_time64_data_type, + GARROW_TYPE_TIME_DATA_TYPE) + +static void +garrow_time64_data_type_init(GArrowTime64DataType *object) +{ +} + +static void +garrow_time64_data_type_class_init(GArrowTime64DataTypeClass *klass) +{ +} + +/** + * garrow_time64_data_type_new: + * @unit: %GARROW_TIME_UNIT_SECOND or %GARROW_TIME_UNIT_MILLI. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): + * A newly created the number of seconds or milliseconds since + * midnight in 64-bit signed integer data type. + * + * Since: 0.7.0 + */ +GArrowTime64DataType * +garrow_time64_data_type_new(GArrowTimeUnit unit, GError **error) +{ + switch (unit) { + case GARROW_TIME_UNIT_MICRO: + case GARROW_TIME_UNIT_NANO: + break; + default: + { + auto enum_class = G_ENUM_CLASS(g_type_class_ref(GARROW_TYPE_TIME_UNIT)); + auto value = g_enum_get_value(enum_class, unit); + if (value) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "[time64-data-type][new] time unit must be micro or nano: " + "<%s>", + value->value_nick); + } else { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "[time64-data-type][new] " + "time unit must be micro(%d) or nano(%d): <%d>", + GARROW_TIME_UNIT_MICRO, + GARROW_TIME_UNIT_NANO, + unit); + } + g_type_class_unref(enum_class); + } + return NULL; + } + + auto arrow_unit = garrow_time_unit_to_raw(unit); + auto arrow_data_type = arrow::time64(arrow_unit); + auto data_type = + GARROW_TIME64_DATA_TYPE(g_object_new(GARROW_TYPE_TIME64_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + + +G_DEFINE_ABSTRACT_TYPE(GArrowDecimalDataType, + garrow_decimal_data_type, + GARROW_TYPE_FIXED_SIZE_BINARY_DATA_TYPE) + +static void +garrow_decimal_data_type_init(GArrowDecimalDataType *object) +{ +} + +static void +garrow_decimal_data_type_class_init(GArrowDecimalDataTypeClass *klass) +{ +} + +/** + * garrow_decimal_data_type_new: + * @precision: The precision of decimal data. + * @scale: The scale of decimal data. + * + * Returns: The newly created decimal data type. + * + * Since: 0.10.0 + * + * Deprecated: 0.12.0: + * Use garrow_decimal128_data_type_new() instead. + */ +GArrowDecimalDataType * +garrow_decimal_data_type_new(gint32 precision, + gint32 scale) +{ + auto decimal128_data_type = garrow_decimal128_data_type_new(precision, scale); + return GARROW_DECIMAL_DATA_TYPE(decimal128_data_type); +} + +/** + * garrow_decimal_data_type_get_precision: + * @decimal_data_type: The #GArrowDecimalDataType. + * + * Returns: The precision of the decimal data type. + * + * Since: 0.10.0 + */ +gint32 +garrow_decimal_data_type_get_precision(GArrowDecimalDataType *decimal_data_type) +{ + const auto arrow_data_type = + garrow_data_type_get_raw(GARROW_DATA_TYPE(decimal_data_type)); + const auto arrow_decimal_type = + std::static_pointer_cast<arrow::DecimalType>(arrow_data_type); + return arrow_decimal_type->precision(); +} + +/** + * garrow_decimal_data_type_get_scale: + * @decimal_data_type: The #GArrowDecimalDataType. + * + * Returns: The scale of the decimal data type. + * + * Since: 0.10.0 + */ +gint32 +garrow_decimal_data_type_get_scale(GArrowDecimalDataType *decimal_data_type) +{ + const auto arrow_data_type = + garrow_data_type_get_raw(GARROW_DATA_TYPE(decimal_data_type)); + const auto arrow_decimal_type = + std::static_pointer_cast<arrow::DecimalType>(arrow_data_type); + return arrow_decimal_type->scale(); +} + + +G_DEFINE_TYPE(GArrowDecimal128DataType, + garrow_decimal128_data_type, + GARROW_TYPE_DECIMAL_DATA_TYPE) + +static void +garrow_decimal128_data_type_init(GArrowDecimal128DataType *object) +{ +} + +static void +garrow_decimal128_data_type_class_init(GArrowDecimal128DataTypeClass *klass) +{ +} + +/** + * garrow_decimal128_data_type_max_precision: + * + * Returns: The max precision of 128-bit decimal data type. + * + * Since: 3.0.0 + */ +gint32 +garrow_decimal128_data_type_max_precision() +{ + return arrow::Decimal128Type::kMaxPrecision; +} + +/** + * garrow_decimal128_data_type_new: + * @precision: The precision of decimal data. + * @scale: The scale of decimal data. + * + * Returns: The newly created 128-bit decimal data type. + * + * Since: 0.12.0 + */ +GArrowDecimal128DataType * +garrow_decimal128_data_type_new(gint32 precision, + gint32 scale) +{ + auto arrow_data_type = arrow::decimal128(precision, scale); + + auto data_type = + GARROW_DECIMAL128_DATA_TYPE(g_object_new(GARROW_TYPE_DECIMAL128_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + + +G_DEFINE_TYPE(GArrowDecimal256DataType, + garrow_decimal256_data_type, + GARROW_TYPE_DECIMAL_DATA_TYPE) + +static void +garrow_decimal256_data_type_init(GArrowDecimal256DataType *object) +{ +} + +static void +garrow_decimal256_data_type_class_init(GArrowDecimal256DataTypeClass *klass) +{ +} + +/** + * garrow_decimal256_data_type_max_precision: + * + * Returns: The max precision of 256-bit decimal data type. + * + * Since: 3.0.0 + */ +gint32 +garrow_decimal256_data_type_max_precision() +{ + return arrow::Decimal256Type::kMaxPrecision; +} + +/** + * garrow_decimal256_data_type_new: + * @precision: The precision of decimal data. + * @scale: The scale of decimal data. + * + * Returns: The newly created 256-bit decimal data type. + * + * Since: 3.0.0 + */ +GArrowDecimal256DataType * +garrow_decimal256_data_type_new(gint32 precision, + gint32 scale) +{ + auto arrow_data_type = arrow::decimal256(precision, scale); + + auto data_type = + GARROW_DECIMAL256_DATA_TYPE(g_object_new(GARROW_TYPE_DECIMAL256_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + + +typedef struct GArrowExtensionDataTypePrivate_ { + GArrowDataType *storage_data_type; +} GArrowExtensionDataTypePrivate; + +enum { + PROP_STORAGE_DATA_TYPE = 1 +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowExtensionDataType, + garrow_extension_data_type, + GARROW_TYPE_DATA_TYPE) + +#define GARROW_EXTENSION_DATA_TYPE_GET_PRIVATE(obj) \ + static_cast<GArrowExtensionDataTypePrivate *>( \ + garrow_extension_data_type_get_instance_private( \ + GARROW_EXTENSION_DATA_TYPE(obj))) + +static void +garrow_extension_data_type_dispose(GObject *object) +{ + auto priv = GARROW_EXTENSION_DATA_TYPE_GET_PRIVATE(object); + + if (priv->storage_data_type) { + g_object_unref(priv->storage_data_type); + priv->storage_data_type = NULL; + } + + G_OBJECT_CLASS(garrow_extension_data_type_parent_class)->dispose(object); +} + +static void +garrow_extension_data_type_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_EXTENSION_DATA_TYPE_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_STORAGE_DATA_TYPE: + priv->storage_data_type = GARROW_DATA_TYPE(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_extension_data_type_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_EXTENSION_DATA_TYPE_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_STORAGE_DATA_TYPE: + g_value_set_object(value, priv->storage_data_type); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_extension_data_type_init(GArrowExtensionDataType *object) +{ +} + +static void +garrow_extension_data_type_class_init(GArrowExtensionDataTypeClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->dispose = garrow_extension_data_type_dispose; + gobject_class->set_property = garrow_extension_data_type_set_property; + gobject_class->get_property = garrow_extension_data_type_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("storage-data-type", + "Storage data type", + "The underlying GArrowDataType", + GARROW_TYPE_DATA_TYPE, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_STORAGE_DATA_TYPE, spec); +} + +/** + * garrow_extension_data_type_get_extension_name: + * @data_type: A #GArrowExtensionDataType. + * + * Returns: The extension name of the type. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 3.0.0 + */ +gchar * +garrow_extension_data_type_get_extension_name(GArrowExtensionDataType *data_type) +{ + auto arrow_data_type = + std::static_pointer_cast<arrow::ExtensionType>( + garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type))); + return g_strdup(arrow_data_type->extension_name().c_str()); +} + +/** + * garrow_extension_data_type_wrap_array: + * @data_type: A #GArrowExtensionDataType. + * @storage: A #GArrowArray. + * + * Returns: (transfer full): The array that wraps underlying storage array. + * + * Since: 3.0.0 + */ +GArrowExtensionArray * +garrow_extension_data_type_wrap_array(GArrowExtensionDataType *data_type, + GArrowArray *storage) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto arrow_storage = garrow_array_get_raw(storage); + auto arrow_extension_array = arrow::ExtensionType::WrapArray(arrow_data_type, + arrow_storage); + auto array = garrow_extension_array_new_raw(&arrow_extension_array, storage); + return GARROW_EXTENSION_ARRAY(array); +} + +/** + * garrow_extension_data_type_wrap_chunked_array: + * @data_type: A #GArrowExtensionDataType. + * @storage: A #GArrowChunkedArray. + * + * Returns: (transfer full): The chunked array that wraps underlying + * storage chunked array. + * + * Since: 3.0.0 + */ +GArrowChunkedArray * +garrow_extension_data_type_wrap_chunked_array(GArrowExtensionDataType *data_type, + GArrowChunkedArray *storage) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto arrow_storage = garrow_chunked_array_get_raw(storage); + auto arrow_extension_chunked_array = + arrow::ExtensionType::WrapArray(arrow_data_type, + arrow_storage); + return garrow_chunked_array_new_raw(&arrow_extension_chunked_array); +} + + +static std::shared_ptr<arrow::DataType> +garrow_extension_data_type_get_storage_data_type_raw( + GArrowExtensionDataType *data_type) +{ + auto priv = GARROW_EXTENSION_DATA_TYPE_GET_PRIVATE(data_type); + return garrow_data_type_get_raw(priv->storage_data_type); +} + +G_END_DECLS + +namespace garrow { + GExtensionType::GExtensionType(GArrowExtensionDataType *garrow_data_type) : + arrow::ExtensionType( + garrow_extension_data_type_get_storage_data_type_raw(garrow_data_type)), + garrow_data_type_(garrow_data_type) { + g_object_ref(garrow_data_type_); + } + + GExtensionType::~GExtensionType() { + g_object_unref(garrow_data_type_); + } + + GArrowExtensionDataType *GExtensionType::garrow_data_type() const { + return garrow_data_type_; + } + + std::string GExtensionType::extension_name() const { + auto klass = GARROW_EXTENSION_DATA_TYPE_GET_CLASS(garrow_data_type_); + auto c_name = klass->get_extension_name(garrow_data_type_); + std::string name(c_name); + g_free(c_name); + return name; + } + + bool GExtensionType::ExtensionEquals(const arrow::ExtensionType& other) const { + if (extension_name() != other.extension_name()) { + return false; + } + auto klass = GARROW_EXTENSION_DATA_TYPE_GET_CLASS(garrow_data_type_); + auto garrow_other_data_type = + static_cast<const GExtensionType&>(other).garrow_data_type_; + return klass->equal(garrow_data_type_, + garrow_other_data_type); + } + + std::shared_ptr<arrow::Array> + GExtensionType::MakeArray(std::shared_ptr<arrow::ArrayData> data) const { + return std::make_shared<arrow::ExtensionArray>(data); + } + + arrow::Result<std::shared_ptr<arrow::DataType>> + GExtensionType::Deserialize(std::shared_ptr<arrow::DataType> storage_data_type, + const std::string& serialized_data) const { + auto klass = GARROW_EXTENSION_DATA_TYPE_GET_CLASS(garrow_data_type_); + auto garrow_storage_data_type = garrow_data_type_new_raw(&storage_data_type); + GBytes *g_serialized_data = g_bytes_new_static(serialized_data.data(), + serialized_data.size()); + GError *error = NULL; + auto garrow_deserialized_data_type = + klass->deserialize(garrow_data_type_, + garrow_storage_data_type, + g_serialized_data, + &error); + g_bytes_unref(g_serialized_data); + g_object_unref(garrow_storage_data_type); + if (error) { + return garrow_error_to_status(error, + arrow::StatusCode::SerializationError, + "[extension-type][deserialize]"); + } + + auto deserialized_data_type = + garrow_data_type_get_raw(garrow_deserialized_data_type); + g_object_unref(garrow_deserialized_data_type); + return deserialized_data_type; + } + + std::string + GExtensionType::Serialize() const { + auto klass = GARROW_EXTENSION_DATA_TYPE_GET_CLASS(garrow_data_type_); + auto g_bytes = klass->serialize(garrow_data_type_); + gsize raw_data_size = 0; + auto raw_data = g_bytes_get_data(g_bytes, &raw_data_size); + std::string data(static_cast<const char *>(raw_data), + raw_data_size); + g_bytes_unref(g_bytes); + return data; + } + + GType GExtensionType::array_gtype() const { + auto klass = GARROW_EXTENSION_DATA_TYPE_GET_CLASS(garrow_data_type_); + return klass->get_array_gtype(garrow_data_type_); + } +} + +G_BEGIN_DECLS + + +typedef struct GArrowExtensionDataTypeRegistryPrivate_ { + std::shared_ptr<arrow::ExtensionTypeRegistry> registry; +} GArrowExtensionDataTypeRegistryPrivate; + +enum { + PROP_REGISTRY = 1 +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowExtensionDataTypeRegistry, + garrow_extension_data_type_registry, + G_TYPE_OBJECT) + +#define GARROW_EXTENSION_DATA_TYPE_REGISTRY_GET_PRIVATE(obj) \ + static_cast<GArrowExtensionDataTypeRegistryPrivate *>( \ + garrow_extension_data_type_registry_get_instance_private( \ + GARROW_EXTENSION_DATA_TYPE_REGISTRY(obj))) + +static void +garrow_extension_data_type_registry_finalize(GObject *object) +{ + auto priv = GARROW_EXTENSION_DATA_TYPE_REGISTRY_GET_PRIVATE(object); + + priv->registry.~shared_ptr(); + + G_OBJECT_CLASS(garrow_extension_data_type_registry_parent_class)->finalize(object); +} + +static void +garrow_extension_data_type_registry_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_EXTENSION_DATA_TYPE_REGISTRY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_REGISTRY: + priv->registry = + *static_cast<std::shared_ptr<arrow::ExtensionTypeRegistry> *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_extension_data_type_registry_init(GArrowExtensionDataTypeRegistry *object) +{ + auto priv = GARROW_EXTENSION_DATA_TYPE_REGISTRY_GET_PRIVATE(object); + new(&priv->registry) std::shared_ptr<arrow::ExtensionTypeRegistry>; +} + +static void +garrow_extension_data_type_registry_class_init(GArrowExtensionDataTypeRegistryClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_extension_data_type_registry_finalize; + gobject_class->set_property = garrow_extension_data_type_registry_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("registry", + "Registry", + "The raw std::shared<arrow::ExtensionTypeRegistry> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_REGISTRY, spec); +} + +/** + * garrow_extension_data_type_registry_default: + * + * Returns: (transfer full): The default global extension data type registry. + * + * Since: 3.0.0 + */ +GArrowExtensionDataTypeRegistry * +garrow_extension_data_type_registry_default(void) +{ + auto arrow_registry = arrow::ExtensionTypeRegistry::GetGlobalRegistry(); + return garrow_extension_data_type_registry_new_raw(&arrow_registry); +} + +/** + * garrow_extension_data_type_registry_register: + * @registry: A #GArrowExtensionDataTypeRegistry. + * @data_type: A #GArrowExtensionDataType to be registered. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Register the given @data_type to the @registry. + * + * Returns: %TRUE on success, %FALSE on error. + * + * Since: 3.0.0 + */ +gboolean +garrow_extension_data_type_registry_register( + GArrowExtensionDataTypeRegistry *registry, + GArrowExtensionDataType *data_type, + GError **error) +{ + const gchar *context = "[extension-data-type-registry][register]"; + auto klass = GARROW_EXTENSION_DATA_TYPE_GET_CLASS(data_type); + auto set_error = [&](const gchar *name) -> void { + auto klass_name = G_OBJECT_CLASS_NAME(klass); + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_NOT_IMPLEMENTED, + "%s %s::%s() isn't implemented", + context, + klass_name, + name); + }; + if (!klass->get_extension_name) { + set_error("get_extension_name"); + return FALSE; + } + if (!klass->equal) { + set_error("equal"); + return FALSE; + } + if (!klass->deserialize) { + set_error("deserialize"); + return FALSE; + } + if (!klass->serialize) { + set_error("serialize"); + return FALSE; + } + if (!klass->get_array_gtype) { + set_error("get_array_gtype"); + return FALSE; + } + + auto arrow_registry = garrow_extension_data_type_registry_get_raw(registry); + auto arrow_data_type = + std::static_pointer_cast<arrow::ExtensionType>( + garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type))); + auto status = arrow_registry->RegisterType(arrow_data_type); + return garrow::check(error, status, context); +} + +/** + * garrow_extension_data_type_registry_unregister: + * @registry: A #GArrowExtensionDataTypeRegistry. + * @name: An extension data type name to be unregistered. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Unregister an extension data type that has the given @name from the + * @registry. + * + * Returns: %TRUE on success, %FALSE on error. + * + * Since: 3.0.0 + */ +gboolean +garrow_extension_data_type_registry_unregister( + GArrowExtensionDataTypeRegistry *registry, + const gchar *name, + GError **error) +{ + auto arrow_registry = garrow_extension_data_type_registry_get_raw(registry); + auto status = arrow_registry->UnregisterType(name); + return garrow::check(error, + status, + "[extension-data-type-registry][unregister]"); +} + +/** + * garrow_extension_data_type_registry_lookup: + * @registry: A #GArrowExtensionDataTypeRegistry. + * @name: An extension data type name to be looked up. + * + * Returns: (transfer full): A found #GArrowExtensionDataType on + * found, %NULL on not found. + * + * Since: 3.0.0 + */ +GArrowExtensionDataType * +garrow_extension_data_type_registry_lookup( + GArrowExtensionDataTypeRegistry *registry, + const gchar *name) +{ + auto arrow_registry = garrow_extension_data_type_registry_get_raw(registry); + auto arrow_extension_data_type = arrow_registry->GetType(name); + if (!arrow_extension_data_type) { + return NULL; + } + auto arrow_data_type = + std::static_pointer_cast<arrow::DataType>(arrow_extension_data_type); + auto data_type = garrow_data_type_new_raw(&arrow_data_type); + return GARROW_EXTENSION_DATA_TYPE(data_type); +} + + +G_END_DECLS + +GArrowDataType * +garrow_data_type_new_raw(std::shared_ptr<arrow::DataType> *arrow_data_type) +{ + GType type; + GArrowDataType *data_type; + + switch ((*arrow_data_type)->id()) { + case arrow::Type::type::NA: + type = GARROW_TYPE_NULL_DATA_TYPE; + break; + case arrow::Type::type::BOOL: + type = GARROW_TYPE_BOOLEAN_DATA_TYPE; + break; + case arrow::Type::type::UINT8: + type = GARROW_TYPE_UINT8_DATA_TYPE; + break; + case arrow::Type::type::INT8: + type = GARROW_TYPE_INT8_DATA_TYPE; + break; + case arrow::Type::type::UINT16: + type = GARROW_TYPE_UINT16_DATA_TYPE; + break; + case arrow::Type::type::INT16: + type = GARROW_TYPE_INT16_DATA_TYPE; + break; + case arrow::Type::type::UINT32: + type = GARROW_TYPE_UINT32_DATA_TYPE; + break; + case arrow::Type::type::INT32: + type = GARROW_TYPE_INT32_DATA_TYPE; + break; + case arrow::Type::type::UINT64: + type = GARROW_TYPE_UINT64_DATA_TYPE; + break; + case arrow::Type::type::INT64: + type = GARROW_TYPE_INT64_DATA_TYPE; + break; + case arrow::Type::type::FLOAT: + type = GARROW_TYPE_FLOAT_DATA_TYPE; + break; + case arrow::Type::type::DOUBLE: + type = GARROW_TYPE_DOUBLE_DATA_TYPE; + break; + case arrow::Type::type::BINARY: + type = GARROW_TYPE_BINARY_DATA_TYPE; + break; + case arrow::Type::type::LARGE_BINARY: + type = GARROW_TYPE_LARGE_BINARY_DATA_TYPE; + break; + case arrow::Type::type::FIXED_SIZE_BINARY: + type = GARROW_TYPE_FIXED_SIZE_BINARY_DATA_TYPE; + break; + case arrow::Type::type::STRING: + type = GARROW_TYPE_STRING_DATA_TYPE; + break; + case arrow::Type::type::LARGE_STRING: + type = GARROW_TYPE_LARGE_STRING_DATA_TYPE; + break; + case arrow::Type::type::DATE32: + type = GARROW_TYPE_DATE32_DATA_TYPE; + break; + case arrow::Type::type::DATE64: + type = GARROW_TYPE_DATE64_DATA_TYPE; + break; + case arrow::Type::type::TIMESTAMP: + type = GARROW_TYPE_TIMESTAMP_DATA_TYPE; + break; + case arrow::Type::type::TIME32: + type = GARROW_TYPE_TIME32_DATA_TYPE; + break; + case arrow::Type::type::TIME64: + type = GARROW_TYPE_TIME64_DATA_TYPE; + break; + case arrow::Type::type::LIST: + type = GARROW_TYPE_LIST_DATA_TYPE; + break; + case arrow::Type::type::LARGE_LIST: + type = GARROW_TYPE_LARGE_LIST_DATA_TYPE; + break; + case arrow::Type::type::STRUCT: + type = GARROW_TYPE_STRUCT_DATA_TYPE; + break; + case arrow::Type::type::SPARSE_UNION: + type = GARROW_TYPE_SPARSE_UNION_DATA_TYPE; + break; + case arrow::Type::type::DENSE_UNION: + type = GARROW_TYPE_DENSE_UNION_DATA_TYPE; + break; + case arrow::Type::type::DICTIONARY: + type = GARROW_TYPE_DICTIONARY_DATA_TYPE; + break; + case arrow::Type::type::MAP: + type = GARROW_TYPE_MAP_DATA_TYPE; + break; + case arrow::Type::type::DECIMAL128: + type = GARROW_TYPE_DECIMAL128_DATA_TYPE; + break; + case arrow::Type::type::DECIMAL256: + type = GARROW_TYPE_DECIMAL256_DATA_TYPE; + break; + case arrow::Type::type::EXTENSION: + { + auto g_extension_data_type = + std::static_pointer_cast<garrow::GExtensionType>(*arrow_data_type); + if (g_extension_data_type) { + auto garrow_data_type = g_extension_data_type->garrow_data_type(); + g_object_ref(garrow_data_type); + return GARROW_DATA_TYPE(garrow_data_type); + } + } + type = GARROW_TYPE_EXTENSION_DATA_TYPE; + break; + default: + type = GARROW_TYPE_DATA_TYPE; + break; + } + data_type = GARROW_DATA_TYPE(g_object_new(type, + "data-type", arrow_data_type, + NULL)); + return data_type; +} + +std::shared_ptr<arrow::DataType> +garrow_data_type_get_raw(GArrowDataType *data_type) +{ + auto priv = GARROW_DATA_TYPE_GET_PRIVATE(data_type); + if (!priv->data_type && + g_type_is_a(G_OBJECT_TYPE(data_type), GARROW_TYPE_EXTENSION_DATA_TYPE)) { + priv->data_type = std::make_shared<garrow::GExtensionType>( + GARROW_EXTENSION_DATA_TYPE(data_type)); + } + return priv->data_type; +} + +GArrowExtensionDataTypeRegistry * +garrow_extension_data_type_registry_new_raw( + std::shared_ptr<arrow::ExtensionTypeRegistry> *arrow_registry) +{ + auto registry = g_object_new(GARROW_TYPE_EXTENSION_DATA_TYPE_REGISTRY, + "registry", arrow_registry, + NULL); + return GARROW_EXTENSION_DATA_TYPE_REGISTRY(registry); +} + +std::shared_ptr<arrow::ExtensionTypeRegistry> +garrow_extension_data_type_registry_get_raw( + GArrowExtensionDataTypeRegistry *registry) +{ + auto priv = GARROW_EXTENSION_DATA_TYPE_REGISTRY_GET_PRIVATE(registry); + return priv->registry; +} diff --git a/src/arrow/c_glib/arrow-glib/basic-data-type.h b/src/arrow/c_glib/arrow-glib/basic-data-type.h new file mode 100644 index 000000000..f56a8b2d9 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/basic-data-type.h @@ -0,0 +1,596 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/decimal.h> +#include <arrow-glib/type.h> +#include <arrow-glib/version.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_DATA_TYPE (garrow_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDataType, + garrow_data_type, + GARROW, + DATA_TYPE, + GObject) +struct _GArrowDataTypeClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GArrowDataType * +garrow_data_type_import(gpointer c_abi_schema, + GError **error); + +GARROW_AVAILABLE_IN_6_0 +gpointer +garrow_data_type_export(GArrowDataType *data_type, + GError **error); + +gboolean garrow_data_type_equal (GArrowDataType *data_type, + GArrowDataType *other_data_type); +gchar *garrow_data_type_to_string (GArrowDataType *data_type); +GArrowType garrow_data_type_get_id (GArrowDataType *data_type); +GARROW_AVAILABLE_IN_3_0 +gchar * +garrow_data_type_get_name(GArrowDataType *data_type); + + +#define GARROW_TYPE_FIXED_WIDTH_DATA_TYPE (garrow_fixed_width_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowFixedWidthDataType, + garrow_fixed_width_data_type, + GARROW, + FIXED_WIDTH_DATA_TYPE, + GArrowDataType) +struct _GArrowFixedWidthDataTypeClass +{ + GArrowDataTypeClass parent_class; +}; + +gint garrow_fixed_width_data_type_get_bit_width(GArrowFixedWidthDataType *data_type); +/* TODO: +GList *garrow_fixed_width_data_type_get_buffer_layout(GArrowFixedWidthDataType *data_type); +*/ + + +#define GARROW_TYPE_NULL_DATA_TYPE \ + (garrow_null_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowNullDataType, + garrow_null_data_type, + GARROW, + NULL_DATA_TYPE, + GArrowDataType) +struct _GArrowNullDataTypeClass +{ + GArrowDataTypeClass parent_class; +}; + +GArrowNullDataType *garrow_null_data_type_new (void); + + +#define GARROW_TYPE_BOOLEAN_DATA_TYPE (garrow_boolean_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowBooleanDataType, + garrow_boolean_data_type, + GARROW, + BOOLEAN_DATA_TYPE, + GArrowFixedWidthDataType) +struct _GArrowBooleanDataTypeClass +{ + GArrowFixedWidthDataTypeClass parent_class; +}; + +GArrowBooleanDataType *garrow_boolean_data_type_new (void); + + +#define GARROW_TYPE_NUMERIC_DATA_TYPE (garrow_numeric_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowNumericDataType, + garrow_numeric_data_type, + GARROW, + NUMERIC_DATA_TYPE, + GArrowFixedWidthDataType) +struct _GArrowNumericDataTypeClass +{ + GArrowFixedWidthDataTypeClass parent_class; +}; + + +#define GARROW_TYPE_INTEGER_DATA_TYPE (garrow_integer_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowIntegerDataType, + garrow_integer_data_type, + GARROW, + INTEGER_DATA_TYPE, + GArrowNumericDataType) +struct _GArrowIntegerDataTypeClass +{ + GArrowNumericDataTypeClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_16 +gboolean garrow_integer_data_type_is_signed(GArrowIntegerDataType *data_type); + +#define GARROW_TYPE_INT8_DATA_TYPE (garrow_int8_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowInt8DataType, + garrow_int8_data_type, + GARROW, + INT8_DATA_TYPE, + GArrowIntegerDataType) +struct _GArrowInt8DataTypeClass +{ + GArrowIntegerDataTypeClass parent_class; +}; + +GArrowInt8DataType *garrow_int8_data_type_new (void); + + +#define GARROW_TYPE_UINT8_DATA_TYPE (garrow_uint8_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowUInt8DataType, + garrow_uint8_data_type, + GARROW, + UINT8_DATA_TYPE, + GArrowIntegerDataType) +struct _GArrowUInt8DataTypeClass +{ + GArrowIntegerDataTypeClass parent_class; +}; + +GArrowUInt8DataType *garrow_uint8_data_type_new (void); + + +#define GARROW_TYPE_INT16_DATA_TYPE (garrow_int16_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowInt16DataType, + garrow_int16_data_type, + GARROW, + INT16_DATA_TYPE, + GArrowIntegerDataType) +struct _GArrowInt16DataTypeClass +{ + GArrowIntegerDataTypeClass parent_class; +}; + +GArrowInt16DataType *garrow_int16_data_type_new (void); + + +#define GARROW_TYPE_UINT16_DATA_TYPE (garrow_uint16_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowUInt16DataType, + garrow_uint16_data_type, + GARROW, + UINT16_DATA_TYPE, + GArrowIntegerDataType) +struct _GArrowUInt16DataTypeClass +{ + GArrowIntegerDataTypeClass parent_class; +}; + +GArrowUInt16DataType *garrow_uint16_data_type_new (void); + + +#define GARROW_TYPE_INT32_DATA_TYPE (garrow_int32_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowInt32DataType, + garrow_int32_data_type, + GARROW, + INT32_DATA_TYPE, + GArrowIntegerDataType) +struct _GArrowInt32DataTypeClass +{ + GArrowIntegerDataTypeClass parent_class; +}; + +GArrowInt32DataType *garrow_int32_data_type_new (void); + + +#define GARROW_TYPE_UINT32_DATA_TYPE (garrow_uint32_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowUInt32DataType, + garrow_uint32_data_type, + GARROW, + UINT32_DATA_TYPE, + GArrowIntegerDataType) +struct _GArrowUInt32DataTypeClass +{ + GArrowIntegerDataTypeClass parent_class; +}; + +GArrowUInt32DataType *garrow_uint32_data_type_new (void); + + +#define GARROW_TYPE_INT64_DATA_TYPE (garrow_int64_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowInt64DataType, + garrow_int64_data_type, + GARROW, + INT64_DATA_TYPE, + GArrowIntegerDataType) +struct _GArrowInt64DataTypeClass +{ + GArrowIntegerDataTypeClass parent_class; +}; + +GArrowInt64DataType *garrow_int64_data_type_new (void); + + +#define GARROW_TYPE_UINT64_DATA_TYPE (garrow_uint64_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowUInt64DataType, + garrow_uint64_data_type, + GARROW, + UINT64_DATA_TYPE, + GArrowIntegerDataType) +struct _GArrowUInt64DataTypeClass +{ + GArrowIntegerDataTypeClass parent_class; +}; + +GArrowUInt64DataType *garrow_uint64_data_type_new (void); + + +#define GARROW_TYPE_FLOATING_POINT_DATA_TYPE \ + (garrow_floating_point_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowFloatingPointDataType, + garrow_floating_point_data_type, + GARROW, + FLOATING_POINT_DATA_TYPE, + GArrowNumericDataType) +struct _GArrowFloatingPointDataTypeClass +{ + GArrowNumericDataTypeClass parent_class; +}; + + +#define GARROW_TYPE_FLOAT_DATA_TYPE (garrow_float_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowFloatDataType, + garrow_float_data_type, + GARROW, + FLOAT_DATA_TYPE, + GArrowFloatingPointDataType) +struct _GArrowFloatDataTypeClass +{ + GArrowFloatingPointDataTypeClass parent_class; +}; + +GArrowFloatDataType *garrow_float_data_type_new (void); + + +#define GARROW_TYPE_DOUBLE_DATA_TYPE (garrow_double_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDoubleDataType, + garrow_double_data_type, + GARROW, + DOUBLE_DATA_TYPE, + GArrowFloatingPointDataType) +struct _GArrowDoubleDataTypeClass +{ + GArrowFloatingPointDataTypeClass parent_class; +}; + +GArrowDoubleDataType *garrow_double_data_type_new (void); + + +#define GARROW_TYPE_BINARY_DATA_TYPE (garrow_binary_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowBinaryDataType, + garrow_binary_data_type, + GARROW, + BINARY_DATA_TYPE, + GArrowDataType) +struct _GArrowBinaryDataTypeClass +{ + GArrowDataTypeClass parent_class; +}; + +GArrowBinaryDataType *garrow_binary_data_type_new (void); + + +#define GARROW_TYPE_FIXED_SIZE_BINARY_DATA_TYPE (garrow_fixed_size_binary_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeBinaryDataType, + garrow_fixed_size_binary_data_type, + GARROW, + FIXED_SIZE_BINARY_DATA_TYPE, + GArrowDataType) +struct _GArrowFixedSizeBinaryDataTypeClass +{ + GArrowFixedWidthDataTypeClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_12 +GArrowFixedSizeBinaryDataType * +garrow_fixed_size_binary_data_type_new(gint32 byte_width); +GARROW_AVAILABLE_IN_0_12 +gint32 +garrow_fixed_size_binary_data_type_get_byte_width(GArrowFixedSizeBinaryDataType *data_type); + + +#define GARROW_TYPE_LARGE_BINARY_DATA_TYPE (garrow_large_binary_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowLargeBinaryDataType, + garrow_large_binary_data_type, + GARROW, + LARGE_BINARY_DATA_TYPE, + GArrowDataType) +struct _GArrowLargeBinaryDataTypeClass +{ + GArrowDataTypeClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_17 +GArrowLargeBinaryDataType *garrow_large_binary_data_type_new(void); + + +#define GARROW_TYPE_STRING_DATA_TYPE (garrow_string_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowStringDataType, + garrow_string_data_type, + GARROW, + STRING_DATA_TYPE, + GArrowBinaryDataType) +struct _GArrowStringDataTypeClass +{ + GArrowBinaryDataTypeClass parent_class; +}; + +GArrowStringDataType *garrow_string_data_type_new (void); + + +#define GARROW_TYPE_LARGE_STRING_DATA_TYPE (garrow_large_string_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowLargeStringDataType, + garrow_large_string_data_type, + GARROW, + LARGE_STRING_DATA_TYPE, + GArrowLargeBinaryDataType) +struct _GArrowLargeStringDataTypeClass +{ + GArrowLargeBinaryDataTypeClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_17 +GArrowLargeStringDataType *garrow_large_string_data_type_new(void); + + +#define GARROW_TYPE_DATE32_DATA_TYPE (garrow_date32_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDate32DataType, + garrow_date32_data_type, + GARROW, + DATE32_DATA_TYPE, + GArrowDataType) +struct _GArrowDate32DataTypeClass +{ + GArrowDataTypeClass parent_class; +}; + +GArrowDate32DataType *garrow_date32_data_type_new (void); + + +#define GARROW_TYPE_DATE64_DATA_TYPE (garrow_date64_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDate64DataType, + garrow_date64_data_type, + GARROW, + DATE64_DATA_TYPE, + GArrowDataType) +struct _GArrowDate64DataTypeClass +{ + GArrowDataTypeClass parent_class; +}; + +GArrowDate64DataType *garrow_date64_data_type_new (void); + + +#define GARROW_TYPE_TIMESTAMP_DATA_TYPE (garrow_timestamp_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowTimestampDataType, + garrow_timestamp_data_type, + GARROW, + TIMESTAMP_DATA_TYPE, + GArrowDataType) +struct _GArrowTimestampDataTypeClass +{ + GArrowDataTypeClass parent_class; +}; + +GArrowTimestampDataType *garrow_timestamp_data_type_new (GArrowTimeUnit unit); +GArrowTimeUnit +garrow_timestamp_data_type_get_unit (GArrowTimestampDataType *timestamp_data_type); + + +#define GARROW_TYPE_TIME_DATA_TYPE (garrow_time_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowTimeDataType, + garrow_time_data_type, + GARROW, + TIME_DATA_TYPE, + GArrowDataType) +struct _GArrowTimeDataTypeClass +{ + GArrowDataTypeClass parent_class; +}; + +GArrowTimeUnit garrow_time_data_type_get_unit (GArrowTimeDataType *time_data_type); + + +#define GARROW_TYPE_TIME32_DATA_TYPE (garrow_time32_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowTime32DataType, + garrow_time32_data_type, + GARROW, + TIME32_DATA_TYPE, + GArrowTimeDataType) +struct _GArrowTime32DataTypeClass +{ + GArrowTimeDataTypeClass parent_class; +}; + +GArrowTime32DataType *garrow_time32_data_type_new (GArrowTimeUnit unit, + GError **error); + + +#define GARROW_TYPE_TIME64_DATA_TYPE (garrow_time64_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowTime64DataType, + garrow_time64_data_type, + GARROW, + TIME64_DATA_TYPE, + GArrowTimeDataType) +struct _GArrowTime64DataTypeClass +{ + GArrowTimeDataTypeClass parent_class; +}; + +GArrowTime64DataType *garrow_time64_data_type_new (GArrowTimeUnit unit, + GError **error); + + +#define GARROW_TYPE_DECIMAL_DATA_TYPE (garrow_decimal_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDecimalDataType, + garrow_decimal_data_type, + GARROW, + DECIMAL_DATA_TYPE, + GArrowFixedSizeBinaryDataType) +struct _GArrowDecimalDataTypeClass +{ + GArrowFixedSizeBinaryDataTypeClass parent_class; +}; + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_decimal128_data_type_new) +GArrowDecimalDataType * +garrow_decimal_data_type_new(gint32 precision, gint32 scale); +#endif +gint32 garrow_decimal_data_type_get_precision(GArrowDecimalDataType *decimal_data_type); +gint32 garrow_decimal_data_type_get_scale(GArrowDecimalDataType *decimal_data_type); + + +#define GARROW_TYPE_DECIMAL128_DATA_TYPE (garrow_decimal128_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128DataType, + garrow_decimal128_data_type, + GARROW, + DECIMAL128_DATA_TYPE, + GArrowDecimalDataType) +struct _GArrowDecimal128DataTypeClass +{ + GArrowDecimalDataTypeClass parent_class; +}; + +GARROW_AVAILABLE_IN_3_0 +gint32 +garrow_decimal128_data_type_max_precision(); + +GARROW_AVAILABLE_IN_0_12 +GArrowDecimal128DataType * +garrow_decimal128_data_type_new(gint32 precision, gint32 scale); + + +#define GARROW_TYPE_DECIMAL256_DATA_TYPE (garrow_decimal256_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256DataType, + garrow_decimal256_data_type, + GARROW, + DECIMAL256_DATA_TYPE, + GArrowDecimalDataType) +struct _GArrowDecimal256DataTypeClass +{ + GArrowDecimalDataTypeClass parent_class; +}; + +GARROW_AVAILABLE_IN_3_0 +gint32 +garrow_decimal256_data_type_max_precision(); + +GARROW_AVAILABLE_IN_3_0 +GArrowDecimal256DataType * +garrow_decimal256_data_type_new(gint32 precision, gint32 scale); + +#define GARROW_TYPE_EXTENSION_DATA_TYPE (garrow_extension_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowExtensionDataType, + garrow_extension_data_type, + GARROW, + EXTENSION_DATA_TYPE, + GArrowDataType) +/** + * GArrowExtensionDataTypeClass: + * @get_extension_name: It must returns the name of this extension data type. + * @equal: It must returns %TRUE only when the both data types equal, %FALSE + * otherwise. + * @deserialize: It must returns a serialized #GArrowDataType from the given + * `serialized_data`. + * @serialize: It must returns a serialized data of this extension data type + * to deserialize later. + * @get_array_gtype: It must returns #GType for corresponding extension array + * class. + * + * Since: 3.0.0 + */ +struct _GArrowExtensionDataTypeClass +{ + GArrowDataTypeClass parent_class; + + gchar *(*get_extension_name)(GArrowExtensionDataType *data_type); + gboolean (*equal)(GArrowExtensionDataType *data_type, + GArrowExtensionDataType *other_data_type); + GArrowDataType *(*deserialize)(GArrowExtensionDataType *data_type, + GArrowDataType *storage_data_type, + GBytes *serialized_data, + GError **error); + GBytes *(*serialize)(GArrowExtensionDataType *data_type); + GType (*get_array_gtype)(GArrowExtensionDataType *data_type); +}; + +GARROW_AVAILABLE_IN_3_0 +gchar * +garrow_extension_data_type_get_extension_name(GArrowExtensionDataType *data_type); + +typedef struct _GArrowArray GArrowArray; +typedef struct _GArrowExtensionArray GArrowExtensionArray; + +GARROW_AVAILABLE_IN_3_0 +GArrowExtensionArray * +garrow_extension_data_type_wrap_array(GArrowExtensionDataType *data_type, + GArrowArray *storage); + +typedef struct _GArrowChunkedArray GArrowChunkedArray; + +GARROW_AVAILABLE_IN_3_0 +GArrowChunkedArray * +garrow_extension_data_type_wrap_chunked_array(GArrowExtensionDataType *data_type, + GArrowChunkedArray *storage); + + +#define GARROW_TYPE_EXTENSION_DATA_TYPE_REGISTRY \ + (garrow_extension_data_type_registry_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowExtensionDataTypeRegistry, + garrow_extension_data_type_registry, + GARROW, + EXTENSION_DATA_TYPE_REGISTRY, + GObject) +struct _GArrowExtensionDataTypeRegistryClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_3_0 +GArrowExtensionDataTypeRegistry * +garrow_extension_data_type_registry_default(void); + +GARROW_AVAILABLE_IN_3_0 +gboolean +garrow_extension_data_type_registry_register( + GArrowExtensionDataTypeRegistry *registry, + GArrowExtensionDataType *data_type, + GError **error); +GARROW_AVAILABLE_IN_3_0 +gboolean +garrow_extension_data_type_registry_unregister( + GArrowExtensionDataTypeRegistry *registry, + const gchar *name, + GError **error); +GARROW_AVAILABLE_IN_3_0 +GArrowExtensionDataType * +garrow_extension_data_type_registry_lookup( + GArrowExtensionDataTypeRegistry *registry, + const gchar *name); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/basic-data-type.hpp b/src/arrow/c_glib/arrow-glib/basic-data-type.hpp new file mode 100644 index 000000000..ff1fe5e28 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/basic-data-type.hpp @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/api.h> + +#include <arrow-glib/basic-data-type.h> + +GArrowDataType * +garrow_data_type_new_raw(std::shared_ptr<arrow::DataType> *arrow_data_type); +std::shared_ptr<arrow::DataType> +garrow_data_type_get_raw(GArrowDataType *data_type); + +GArrowExtensionDataTypeRegistry * +garrow_extension_data_type_registry_new_raw( + std::shared_ptr<arrow::ExtensionTypeRegistry> *arrow_registry); +std::shared_ptr<arrow::ExtensionTypeRegistry> +garrow_extension_data_type_registry_get_raw( + GArrowExtensionDataTypeRegistry *registry); + +namespace garrow { + class GExtensionType : public arrow::ExtensionType { + public: + explicit GExtensionType(GArrowExtensionDataType *garrow_data_type); + ~GExtensionType(); + + GArrowExtensionDataType * + garrow_data_type() const; + + GType + array_gtype() const; + + std::string extension_name() const override; + + bool ExtensionEquals(const arrow::ExtensionType& other) const override; + + std::shared_ptr<arrow::Array> + MakeArray(std::shared_ptr<arrow::ArrayData> data) const override; + + arrow::Result<std::shared_ptr<arrow::DataType>> + Deserialize(std::shared_ptr<arrow::DataType> storage_data_type, + const std::string& serialized_data) const override; + + std::string + Serialize() const override; + + private: + GArrowExtensionDataType *garrow_data_type_; + }; +} diff --git a/src/arrow/c_glib/arrow-glib/buffer.cpp b/src/arrow/c_glib/arrow-glib/buffer.cpp new file mode 100644 index 000000000..58f47518c --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/buffer.cpp @@ -0,0 +1,695 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/buffer.hpp> +#include <arrow-glib/error.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: buffer + * @section_id: buffer-classes + * @title: Buffer classes + * @include: arrow-glib/arrow-glib.h + * + * #GArrowBuffer is a class for keeping data. Other classes such as + * #GArrowArray and #GArrowTensor can use data in buffer. + * + * #GArrowBuffer is immutable. + * + * #GArrowMutableBuffer is mutable. + * + * #GArrowResizableBuffer is mutable and resizable. + */ + +typedef struct GArrowBufferPrivate_ { + std::shared_ptr<arrow::Buffer> buffer; + GBytes *data; + GArrowBuffer *parent; +} GArrowBufferPrivate; + +enum { + PROP_BUFFER = 1, + PROP_DATA, + PROP_PARENT, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowBuffer, garrow_buffer, G_TYPE_OBJECT) + +#define GARROW_BUFFER_GET_PRIVATE(obj) \ + static_cast<GArrowBufferPrivate *>( \ + garrow_buffer_get_instance_private( \ + GARROW_BUFFER(obj))) + +static void +garrow_buffer_dispose(GObject *object) +{ + auto priv = GARROW_BUFFER_GET_PRIVATE(object); + + if (priv->data) { + g_bytes_unref(priv->data); + priv->data = nullptr; + } + + if (priv->parent) { + g_object_unref(priv->parent); + priv->parent = nullptr; + } + + G_OBJECT_CLASS(garrow_buffer_parent_class)->dispose(object); +} + +static void +garrow_buffer_finalize(GObject *object) +{ + auto priv = GARROW_BUFFER_GET_PRIVATE(object); + + priv->buffer.~shared_ptr(); + + G_OBJECT_CLASS(garrow_buffer_parent_class)->finalize(object); +} + +static void +garrow_buffer_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_BUFFER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_BUFFER: + priv->buffer = + *static_cast<std::shared_ptr<arrow::Buffer> *>(g_value_get_pointer(value)); + break; + case PROP_DATA: + priv->data = static_cast<GBytes *>(g_value_dup_boxed(value)); + break; + case PROP_PARENT: + priv->parent = GARROW_BUFFER(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_buffer_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_BUFFER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_PARENT: + g_value_set_object(value, priv->parent); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_buffer_init(GArrowBuffer *object) +{ + auto priv = GARROW_BUFFER_GET_PRIVATE(object); + new(&priv->buffer) std::shared_ptr<arrow::Buffer>; +} + +static void +garrow_buffer_class_init(GArrowBufferClass *klass) +{ + GParamSpec *spec; + + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_buffer_dispose; + gobject_class->finalize = garrow_buffer_finalize; + gobject_class->set_property = garrow_buffer_set_property; + gobject_class->get_property = garrow_buffer_get_property; + + spec = g_param_spec_pointer("buffer", + "Buffer", + "The raw std::shared_ptr<arrow::Buffer> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_BUFFER, spec); + + spec = g_param_spec_boxed("data", + "Data", + "The raw data passed as GBytes *", + G_TYPE_BYTES, + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_DATA, spec); + + spec = g_param_spec_object("parent", + "Parent", + "The parent GArrowBuffer *", + GARROW_TYPE_BUFFER, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_PARENT, spec); +} + +/** + * garrow_buffer_new: + * @data: (array length=size): Data for the buffer. + * They aren't owned by the new buffer. + * You must not free the data while the new buffer is alive. + * @size: The number of bytes of the data. + * + * Returns: A newly created #GArrowBuffer. + * + * Since: 0.3.0 + */ +GArrowBuffer * +garrow_buffer_new(const guint8 *data, gint64 size) +{ + auto arrow_buffer = std::make_shared<arrow::Buffer>(data, size); + return garrow_buffer_new_raw(&arrow_buffer); +} + +/** + * garrow_buffer_new_bytes: + * @data: Data for the buffer. + * + * Returns: A newly created #GArrowBuffer. + * + * Since: 0.9.0 + */ +GArrowBuffer * +garrow_buffer_new_bytes(GBytes *data) +{ + size_t data_size; + auto raw_data = g_bytes_get_data(data, &data_size); + auto arrow_buffer = + std::make_shared<arrow::Buffer>(static_cast<const uint8_t *>(raw_data), + data_size); + return garrow_buffer_new_raw_bytes(&arrow_buffer, data); +} + +/** + * garrow_buffer_equal: + * @buffer: A #GArrowBuffer. + * @other_buffer: A #GArrowBuffer to be compared. + * + * Returns: %TRUE if both of them have the same data, %FALSE + * otherwise. + * + * Since: 0.4.0 + */ +gboolean +garrow_buffer_equal(GArrowBuffer *buffer, GArrowBuffer *other_buffer) +{ + const auto arrow_buffer = garrow_buffer_get_raw(buffer); + const auto arrow_other_buffer = garrow_buffer_get_raw(other_buffer); + return arrow_buffer->Equals(*arrow_other_buffer); +} + +/** + * garrow_buffer_equal_n_bytes: + * @buffer: A #GArrowBuffer. + * @other_buffer: A #GArrowBuffer to be compared. + * @n_bytes: The number of first bytes to be compared. + * + * Returns: %TRUE if both of them have the same data in the first + * `n_bytes`, %FALSE otherwise. + * + * Since: 0.4.0 + */ +gboolean +garrow_buffer_equal_n_bytes(GArrowBuffer *buffer, + GArrowBuffer *other_buffer, + gint64 n_bytes) +{ + const auto arrow_buffer = garrow_buffer_get_raw(buffer); + const auto arrow_other_buffer = garrow_buffer_get_raw(other_buffer); + return arrow_buffer->Equals(*arrow_other_buffer, n_bytes); +} + +/** + * garrow_buffer_is_mutable: + * @buffer: A #GArrowBuffer. + * + * Returns: %TRUE if the buffer is mutable, %FALSE otherwise. + * + * Since: 0.3.0 + */ +gboolean +garrow_buffer_is_mutable(GArrowBuffer *buffer) +{ + auto arrow_buffer = garrow_buffer_get_raw(buffer); + return arrow_buffer->is_mutable(); +} + +/** + * garrow_buffer_get_capacity: + * @buffer: A #GArrowBuffer. + * + * Returns: The number of bytes that where allocated for the buffer in + * total. + * + * Since: 0.3.0 + */ +gint64 +garrow_buffer_get_capacity(GArrowBuffer *buffer) +{ + auto arrow_buffer = garrow_buffer_get_raw(buffer); + return arrow_buffer->capacity(); +} + +/** + * garrow_buffer_get_data: + * @buffer: A #GArrowBuffer. + * + * Returns: (transfer full): The data of the buffer. The data is owned by + * the buffer. You should not free or modify the data. + * + * Since: 0.3.0 + */ +GBytes * +garrow_buffer_get_data(GArrowBuffer *buffer) +{ + auto priv = GARROW_BUFFER_GET_PRIVATE(buffer); + if (priv->data) { + g_bytes_ref(priv->data); + return priv->data; + } + + auto arrow_buffer = garrow_buffer_get_raw(buffer); + auto data = g_bytes_new_static(arrow_buffer->data(), + arrow_buffer->size()); + return data; +} + +/** + * garrow_buffer_get_mutable_data: + * @buffer: A #GArrowBuffer. + * + * Returns: (transfer full) (nullable): The data of the buffer. If the + * buffer is imutable, it returns %NULL. The data is owned by the + * buffer. You should not free the data. + * + * Since: 0.3.0 + */ +GBytes * +garrow_buffer_get_mutable_data(GArrowBuffer *buffer) +{ + auto arrow_buffer = garrow_buffer_get_raw(buffer); + if (!arrow_buffer->is_mutable()) { + return NULL; + } + + auto priv = GARROW_BUFFER_GET_PRIVATE(buffer); + if (priv->data) { + g_bytes_ref(priv->data); + return priv->data; + } + + return g_bytes_new_static(arrow_buffer->mutable_data(), + arrow_buffer->size()); +} + +/** + * garrow_buffer_get_size: + * @buffer: A #GArrowBuffer. + * + * Returns: The number of bytes that might have valid data. + * + * Since: 0.3.0 + */ +gint64 +garrow_buffer_get_size(GArrowBuffer *buffer) +{ + auto arrow_buffer = garrow_buffer_get_raw(buffer); + return arrow_buffer->size(); +} + +/** + * garrow_buffer_get_parent: + * @buffer: A #GArrowBuffer. + * + * Returns: (nullable) (transfer full): + * The parent #GArrowBuffer or %NULL. + * + * Since: 0.3.0 + */ +GArrowBuffer * +garrow_buffer_get_parent(GArrowBuffer *buffer) +{ + auto priv = GARROW_BUFFER_GET_PRIVATE(buffer); + if (priv->parent) { + g_object_ref(priv->parent); + return priv->parent; + } + + auto arrow_buffer = garrow_buffer_get_raw(buffer); + auto arrow_parent_buffer = arrow_buffer->parent(); + if (arrow_parent_buffer) { + return garrow_buffer_new_raw(&arrow_parent_buffer); + } else { + return NULL; + } +} + +/** + * garrow_buffer_copy: + * @buffer: A #GArrowBuffer. + * @start: An offset of data to be copied in byte. + * @size: The number of bytes to be copied from the start. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): + * A newly copied #GArrowBuffer on success, %NULL on error. + * + * Since: 0.3.0 + */ +GArrowBuffer * +garrow_buffer_copy(GArrowBuffer *buffer, + gint64 start, + gint64 size, + GError **error) +{ + auto arrow_buffer = garrow_buffer_get_raw(buffer); + auto maybe_copied_buffer = arrow_buffer->CopySlice(start, size); + if (garrow::check(error, maybe_copied_buffer, "[buffer][copy]")) { + return garrow_buffer_new_raw(&(*maybe_copied_buffer)); + } else { + return NULL; + } +} + +/** + * garrow_buffer_slice: + * @buffer: A #GArrowBuffer. + * @offset: An offset in the buffer data in byte. + * @size: The number of bytes of the sliced data. + * + * Returns: (transfer full): A newly created #GArrowBuffer that shares + * data of the base #GArrowBuffer. The created #GArrowBuffer has data + * start with offset from the base buffer data and are the specified + * bytes size. + * + * Since: 0.3.0 + */ +GArrowBuffer * +garrow_buffer_slice(GArrowBuffer *buffer, gint64 offset, gint64 size) +{ + auto arrow_parent_buffer = garrow_buffer_get_raw(buffer); + auto arrow_buffer = std::make_shared<arrow::Buffer>(arrow_parent_buffer, + offset, + size); + return garrow_buffer_new_raw_parent(&arrow_buffer, buffer); +} + + +G_DEFINE_TYPE(GArrowMutableBuffer, + garrow_mutable_buffer, + GARROW_TYPE_BUFFER) + +static void +garrow_mutable_buffer_init(GArrowMutableBuffer *object) +{ +} + +static void +garrow_mutable_buffer_class_init(GArrowMutableBufferClass *klass) +{ +} + +/** + * garrow_mutable_buffer_new: + * @data: (array length=size): Data for the buffer. + * They aren't owned by the new buffer. + * You must not free the data while the new buffer is alive. + * @size: The number of bytes of the data. + * + * Returns: A newly created #GArrowMutableBuffer. + * + * Since: 0.3.0 + */ +GArrowMutableBuffer * +garrow_mutable_buffer_new(guint8 *data, gint64 size) +{ + auto arrow_buffer = std::make_shared<arrow::MutableBuffer>(data, size); + return garrow_mutable_buffer_new_raw(&arrow_buffer); +} + +/** + * garrow_mutable_buffer_new_bytes: + * @data: Data for the buffer. + * + * Returns: A newly created #GArrowMutableBuffer. + * + * Since: 0.9.0 + */ +GArrowMutableBuffer * +garrow_mutable_buffer_new_bytes(GBytes *data) +{ + size_t data_size; + auto raw_data = g_bytes_get_data(data, &data_size); + auto mutable_raw_data = const_cast<gpointer>(raw_data); + auto arrow_buffer = + std::make_shared<arrow::MutableBuffer>(static_cast<uint8_t *>(mutable_raw_data), + data_size); + return garrow_mutable_buffer_new_raw_bytes(&arrow_buffer, data); +} + +/** + * garrow_mutable_buffer_slice: + * @buffer: A #GArrowMutableBuffer. + * @offset: An offset in the buffer data in byte. + * @size: The number of bytes of the sliced data. + * + * Returns: (transfer full): A newly created #GArrowMutableBuffer that + * shares data of the base #GArrowMutableBuffer. The created + * #GArrowMutableBuffer has data start with offset from the base + * buffer data and are the specified bytes size. + * + * Since: 0.3.0 + */ +GArrowMutableBuffer * +garrow_mutable_buffer_slice(GArrowMutableBuffer *buffer, + gint64 offset, + gint64 size) +{ + auto arrow_parent_buffer = garrow_buffer_get_raw(GARROW_BUFFER(buffer)); + auto arrow_mutable_buffer = + std::make_shared<arrow::MutableBuffer>(arrow_parent_buffer, + offset, + size); + auto arrow_buffer = + std::static_pointer_cast<arrow::Buffer>(arrow_mutable_buffer); + auto sliced_buffer = garrow_buffer_new_raw_parent(&arrow_buffer, + GARROW_BUFFER(buffer)); + return GARROW_MUTABLE_BUFFER(sliced_buffer); +} + +/** + * garrow_mutable_buffer_set_data: + * @buffer: A #GArrowMutableBuffer. + * @offset: A write offset in the buffer data in byte. + * @data: (array length=size): The data to be written. + * @size: The number of bytes of the data to be written. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE otherwise. + * + * Since: 0.12.0 + */ +gboolean +garrow_mutable_buffer_set_data(GArrowMutableBuffer *buffer, + gint64 offset, + const guint8 *data, + gint64 size, + GError **error) +{ + const gchar *context = "[mutable-buffer][set-data]"; + auto arrow_buffer = garrow_buffer_get_raw(GARROW_BUFFER(buffer)); + if (offset + size > arrow_buffer->size()) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "%s: Data is too large: " + "<(%" G_GINT64_FORMAT " + %" G_GINT64_FORMAT ") > " + "(%" G_GINT64_FORMAT ")>", + context, + offset, + size, + arrow_buffer->size()); + return FALSE; + } + memcpy(arrow_buffer->mutable_data() + offset, data, size); + return TRUE; +} + + +G_DEFINE_TYPE(GArrowResizableBuffer, + garrow_resizable_buffer, + GARROW_TYPE_MUTABLE_BUFFER) + +static void +garrow_resizable_buffer_init(GArrowResizableBuffer *object) +{ +} + +static void +garrow_resizable_buffer_class_init(GArrowResizableBufferClass *klass) +{ +} + +/** + * garrow_resizable_buffer_new: + * @initial_size: The initial buffer size in bytes. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowResizableBuffer. + * + * Since: 0.10.0 + */ +GArrowResizableBuffer * +garrow_resizable_buffer_new(gint64 initial_size, + GError **error) +{ + auto maybe_buffer = arrow::AllocateResizableBuffer(initial_size); + if (garrow::check(error, maybe_buffer, "[resizable-buffer][new]")) { + auto arrow_buffer = std::shared_ptr<arrow::ResizableBuffer>( + *std::move(maybe_buffer)); + return garrow_resizable_buffer_new_raw(&arrow_buffer); + } else { + return NULL; + } +} + + +/** + * garrow_resizable_buffer_resize: + * @buffer: A #GArrowResizableBuffer. + * @new_size: The new buffer size in bytes. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.3.0 + */ +gboolean +garrow_resizable_buffer_resize(GArrowResizableBuffer *buffer, + gint64 new_size, + GError **error) +{ + auto arrow_buffer = garrow_buffer_get_raw(GARROW_BUFFER(buffer)); + auto arrow_resizable_buffer = + std::static_pointer_cast<arrow::ResizableBuffer>(arrow_buffer); + auto status = arrow_resizable_buffer->Resize(new_size); + return garrow_error_check(error, status, "[resizable-buffer][resize]"); +} + +/** + * garrow_resizable_buffer_reserve: + * @buffer: A #GArrowResizableBuffer. + * @new_capacity: The new buffer capacity in bytes. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.3.0 + */ +gboolean +garrow_resizable_buffer_reserve(GArrowResizableBuffer *buffer, + gint64 new_capacity, + GError **error) +{ + auto arrow_buffer = garrow_buffer_get_raw(GARROW_BUFFER(buffer)); + auto arrow_resizable_buffer = + std::static_pointer_cast<arrow::ResizableBuffer>(arrow_buffer); + auto status = arrow_resizable_buffer->Reserve(new_capacity); + return garrow_error_check(error, status, "[resizable-buffer][capacity]"); +} + + +G_END_DECLS + +GArrowBuffer * +garrow_buffer_new_raw(std::shared_ptr<arrow::Buffer> *arrow_buffer) +{ + return garrow_buffer_new_raw_bytes(arrow_buffer, nullptr); +} + +GArrowBuffer * +garrow_buffer_new_raw_bytes(std::shared_ptr<arrow::Buffer> *arrow_buffer, + GBytes *data) +{ + auto buffer = GARROW_BUFFER(g_object_new(GARROW_TYPE_BUFFER, + "buffer", arrow_buffer, + "data", data, + NULL)); + return buffer; +} + +GArrowBuffer * +garrow_buffer_new_raw_parent(std::shared_ptr<arrow::Buffer> *arrow_buffer, + GArrowBuffer *parent) +{ + auto buffer = GARROW_BUFFER(g_object_new(G_OBJECT_TYPE(parent), + "buffer", arrow_buffer, + "parent", parent, + NULL)); + return buffer; +} + +std::shared_ptr<arrow::Buffer> +garrow_buffer_get_raw(GArrowBuffer *buffer) +{ + if (!buffer) + return nullptr; + + auto priv = GARROW_BUFFER_GET_PRIVATE(buffer); + return priv->buffer; +} + +GArrowMutableBuffer * +garrow_mutable_buffer_new_raw(std::shared_ptr<arrow::MutableBuffer> *arrow_buffer) +{ + return garrow_mutable_buffer_new_raw_bytes(arrow_buffer, nullptr); +} + +GArrowMutableBuffer * +garrow_mutable_buffer_new_raw_bytes(std::shared_ptr<arrow::MutableBuffer> *arrow_buffer, + GBytes *data) +{ + auto buffer = GARROW_MUTABLE_BUFFER(g_object_new(GARROW_TYPE_MUTABLE_BUFFER, + "buffer", arrow_buffer, + "data", data, + NULL)); + return buffer; +} + +GArrowResizableBuffer * +garrow_resizable_buffer_new_raw(std::shared_ptr<arrow::ResizableBuffer> *arrow_buffer) +{ + auto buffer = + GARROW_RESIZABLE_BUFFER(g_object_new(GARROW_TYPE_RESIZABLE_BUFFER, + "buffer", arrow_buffer, + NULL)); + return buffer; +} diff --git a/src/arrow/c_glib/arrow-glib/buffer.h b/src/arrow/c_glib/arrow-glib/buffer.h new file mode 100644 index 000000000..a176071f2 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/buffer.h @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/gobject-type.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_BUFFER (garrow_buffer_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowBuffer, + garrow_buffer, + GARROW, + BUFFER, + GObject) +struct _GArrowBufferClass +{ + GObjectClass parent_class; +}; + +GArrowBuffer *garrow_buffer_new (const guint8 *data, + gint64 size); +GArrowBuffer *garrow_buffer_new_bytes (GBytes *data); +gboolean garrow_buffer_equal (GArrowBuffer *buffer, + GArrowBuffer *other_buffer); +gboolean garrow_buffer_equal_n_bytes(GArrowBuffer *buffer, + GArrowBuffer *other_buffer, + gint64 n_bytes); +gboolean garrow_buffer_is_mutable (GArrowBuffer *buffer); +gint64 garrow_buffer_get_capacity (GArrowBuffer *buffer); +GBytes *garrow_buffer_get_data (GArrowBuffer *buffer); +GBytes *garrow_buffer_get_mutable_data(GArrowBuffer *buffer); +gint64 garrow_buffer_get_size (GArrowBuffer *buffer); +GArrowBuffer *garrow_buffer_get_parent (GArrowBuffer *buffer); + +GArrowBuffer *garrow_buffer_copy (GArrowBuffer *buffer, + gint64 start, + gint64 size, + GError **error); +GArrowBuffer *garrow_buffer_slice (GArrowBuffer *buffer, + gint64 offset, + gint64 size); + + +#define GARROW_TYPE_MUTABLE_BUFFER (garrow_mutable_buffer_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowMutableBuffer, + garrow_mutable_buffer, + GARROW, + MUTABLE_BUFFER, + GArrowBuffer) +struct _GArrowMutableBufferClass +{ + GArrowBufferClass parent_class; +}; + +GArrowMutableBuffer *garrow_mutable_buffer_new (guint8 *data, + gint64 size); +GArrowMutableBuffer *garrow_mutable_buffer_new_bytes(GBytes *data); +GArrowMutableBuffer *garrow_mutable_buffer_slice(GArrowMutableBuffer *buffer, + gint64 offset, + gint64 size); +gboolean garrow_mutable_buffer_set_data(GArrowMutableBuffer *buffer, + gint64 offset, + const guint8 *data, + gint64 size, + GError **error); + + +#define GARROW_TYPE_RESIZABLE_BUFFER (garrow_resizable_buffer_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowResizableBuffer, + garrow_resizable_buffer, + GARROW, + RESIZABLE_BUFFER, + GArrowMutableBuffer) +struct _GArrowResizableBufferClass +{ + GArrowMutableBufferClass parent_class; +}; + + +GArrowResizableBuffer *garrow_resizable_buffer_new(gint64 initial_size, + GError **error); +gboolean garrow_resizable_buffer_resize(GArrowResizableBuffer *buffer, + gint64 new_size, + GError **error); +gboolean garrow_resizable_buffer_reserve(GArrowResizableBuffer *buffer, + gint64 new_capacity, + GError **error); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/buffer.hpp b/src/arrow/c_glib/arrow-glib/buffer.hpp new file mode 100644 index 000000000..34a28be46 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/buffer.hpp @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/api.h> + +#include <arrow-glib/buffer.h> + +GArrowBuffer * +garrow_buffer_new_raw(std::shared_ptr<arrow::Buffer> *arrow_buffer); +GArrowBuffer * +garrow_buffer_new_raw_bytes(std::shared_ptr<arrow::Buffer> *arrow_buffer, + GBytes *data); +GArrowBuffer * +garrow_buffer_new_raw_parent(std::shared_ptr<arrow::Buffer> *arrow_buffer, + GArrowBuffer *parent); +std::shared_ptr<arrow::Buffer> +garrow_buffer_get_raw(GArrowBuffer *buffer); + +GArrowMutableBuffer * +garrow_mutable_buffer_new_raw(std::shared_ptr<arrow::MutableBuffer> *arrow_buffer); +GArrowMutableBuffer * +garrow_mutable_buffer_new_raw_bytes(std::shared_ptr<arrow::MutableBuffer> *arrow_buffer, + GBytes *data); +GArrowResizableBuffer * +garrow_resizable_buffer_new_raw(std::shared_ptr<arrow::ResizableBuffer> *arrow_buffer); diff --git a/src/arrow/c_glib/arrow-glib/chunked-array.cpp b/src/arrow/c_glib/arrow-glib/chunked-array.cpp new file mode 100644 index 000000000..51ca41693 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/chunked-array.cpp @@ -0,0 +1,370 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/array.hpp> +#include <arrow-glib/chunked-array.hpp> +#include <arrow-glib/data-type.hpp> +#include <arrow-glib/type.hpp> +#include <arrow-glib/error.hpp> + +#include <sstream> + +G_BEGIN_DECLS + +/** + * SECTION: chunked-array + * @short_description: Chunked array class + * + * #GArrowChunkedArray is a class for chunked array. Chunked array + * makes a list of #GArrowArrays one logical large array. + */ + +typedef struct GArrowChunkedArrayPrivate_ { + std::shared_ptr<arrow::ChunkedArray> chunked_array; +} GArrowChunkedArrayPrivate; + +enum { + PROP_0, + PROP_CHUNKED_ARRAY +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowChunkedArray, + garrow_chunked_array, + G_TYPE_OBJECT) + +#define GARROW_CHUNKED_ARRAY_GET_PRIVATE(obj) \ + static_cast<GArrowChunkedArrayPrivate *>( \ + garrow_chunked_array_get_instance_private( \ + GARROW_CHUNKED_ARRAY(obj))) + +static void +garrow_chunked_array_finalize(GObject *object) +{ + auto priv = GARROW_CHUNKED_ARRAY_GET_PRIVATE(object); + + priv->chunked_array.~shared_ptr(); + + G_OBJECT_CLASS(garrow_chunked_array_parent_class)->finalize(object); +} + +static void +garrow_chunked_array_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_CHUNKED_ARRAY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_CHUNKED_ARRAY: + priv->chunked_array = + *static_cast<std::shared_ptr<arrow::ChunkedArray> *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_chunked_array_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + switch (prop_id) { + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_chunked_array_init(GArrowChunkedArray *object) +{ + auto priv = GARROW_CHUNKED_ARRAY_GET_PRIVATE(object); + new(&priv->chunked_array) std::shared_ptr<arrow::ChunkedArray>; +} + +static void +garrow_chunked_array_class_init(GArrowChunkedArrayClass *klass) +{ + GObjectClass *gobject_class; + GParamSpec *spec; + + gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_chunked_array_finalize; + gobject_class->set_property = garrow_chunked_array_set_property; + gobject_class->get_property = garrow_chunked_array_get_property; + + spec = g_param_spec_pointer("chunked-array", + "Chunked array", + "The raw std::shared<arrow::ChunkedArray> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_CHUNKED_ARRAY, spec); +} + +/** + * garrow_chunked_array_new: + * @chunks: (element-type GArrowArray): The array chunks. + * + * Returns: A newly created #GArrowChunkedArray. + */ +GArrowChunkedArray * +garrow_chunked_array_new(GList *chunks) +{ + std::vector<std::shared_ptr<arrow::Array>> arrow_chunks; + for (GList *node = chunks; node; node = node->next) { + GArrowArray *chunk = GARROW_ARRAY(node->data); + arrow_chunks.push_back(garrow_array_get_raw(chunk)); + } + + auto arrow_chunked_array = + std::make_shared<arrow::ChunkedArray>(arrow_chunks); + return garrow_chunked_array_new_raw(&arrow_chunked_array); +} + +/** + * garrow_chunked_array_equal: + * @chunked_array: A #GArrowChunkedArray. + * @other_chunked_array: A #GArrowChunkedArray to be compared. + * + * Returns: %TRUE if both of them have the same data, %FALSE + * otherwise. + * + * Since: 0.4.0 + */ +gboolean +garrow_chunked_array_equal(GArrowChunkedArray *chunked_array, + GArrowChunkedArray *other_chunked_array) +{ + const auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array); + const auto arrow_other_chunked_array = + garrow_chunked_array_get_raw(other_chunked_array); + return arrow_chunked_array->Equals(arrow_other_chunked_array); +} + +/** + * garrow_chunked_array_get_value_data_type: + * @chunked_array: A #GArrowChunkedArray. + * + * Returns: (transfer full): The #GArrowDataType of the value of + * the chunked array. + * + * Since: 0.9.0 + */ +GArrowDataType * +garrow_chunked_array_get_value_data_type(GArrowChunkedArray *chunked_array) +{ + auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array); + auto arrow_type = arrow_chunked_array->type(); + return garrow_data_type_new_raw(&arrow_type); +} + +/** + * garrow_chunked_array_get_value_type: + * @chunked_array: A #GArrowChunkedArray. + * + * Returns: The #GArrowType of the value of the chunked array. + * + * Since: 0.9.0 + */ +GArrowType +garrow_chunked_array_get_value_type(GArrowChunkedArray *chunked_array) +{ + auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array); + auto arrow_type = arrow_chunked_array->type(); + return garrow_type_from_raw(arrow_type->id()); +} + +/** + * garrow_chunked_array_get_length: + * @chunked_array: A #GArrowChunkedArray. + * + * Returns: The total number of rows in the chunked array. + * + * Deprecated: 0.15.0: Use garrow_chunked_array_get_n_rows() instead. + */ +guint64 +garrow_chunked_array_get_length(GArrowChunkedArray *chunked_array) +{ + return garrow_chunked_array_get_n_rows(chunked_array); +} + +/** + * garrow_chunked_array_get_n_rows: + * @chunked_array: A #GArrowChunkedArray. + * + * Returns: The total number of rows in the chunked array. + * + * Since: 0.15.0 + */ +guint64 +garrow_chunked_array_get_n_rows(GArrowChunkedArray *chunked_array) +{ + const auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array); + return arrow_chunked_array->length(); +} + +/** + * garrow_chunked_array_get_n_nulls: + * @chunked_array: A #GArrowChunkedArray. + * + * Returns: The total number of NULL in the chunked array. + */ +guint64 +garrow_chunked_array_get_n_nulls(GArrowChunkedArray *chunked_array) +{ + const auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array); + return arrow_chunked_array->null_count(); +} + +/** + * garrow_chunked_array_get_n_chunks: + * @chunked_array: A #GArrowChunkedArray. + * + * Returns: The total number of chunks in the chunked array. + */ +guint +garrow_chunked_array_get_n_chunks(GArrowChunkedArray *chunked_array) +{ + const auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array); + return arrow_chunked_array->num_chunks(); +} + +/** + * garrow_chunked_array_get_chunk: + * @chunked_array: A #GArrowChunkedArray. + * @i: The index of the target chunk. + * + * Returns: (transfer full): The i-th chunk of the chunked array. + */ +GArrowArray * +garrow_chunked_array_get_chunk(GArrowChunkedArray *chunked_array, + guint i) +{ + const auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array); + auto arrow_chunk = arrow_chunked_array->chunk(i); + return garrow_array_new_raw(&arrow_chunk); +} + +/** + * garrow_chunked_array_get_chunks: + * @chunked_array: A #GArrowChunkedArray. + * + * Returns: (element-type GArrowArray) (transfer full): + * The chunks in the chunked array. + */ +GList * +garrow_chunked_array_get_chunks(GArrowChunkedArray *chunked_array) +{ + const auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array); + + GList *chunks = NULL; + for (auto arrow_chunk : arrow_chunked_array->chunks()) { + GArrowArray *chunk = garrow_array_new_raw(&arrow_chunk); + chunks = g_list_prepend(chunks, chunk); + } + + return g_list_reverse(chunks); +} + +/** + * garrow_chunked_array_slice: + * @chunked_array: A #GArrowChunkedArray. + * @offset: The offset of sub #GArrowChunkedArray. + * @length: The length of sub #GArrowChunkedArray. + * + * Returns: (transfer full): The sub #GArrowChunkedArray. It covers only from + * `offset` to `offset + length` range. The sub #GArrowChunkedArray shares + * values with the base #GArrowChunkedArray. + */ +GArrowChunkedArray * +garrow_chunked_array_slice(GArrowChunkedArray *chunked_array, + guint64 offset, + guint64 length) +{ + const auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array); + auto arrow_sub_chunked_array = arrow_chunked_array->Slice(offset, length); + return garrow_chunked_array_new_raw(&arrow_sub_chunked_array); +} + +/** + * garrow_chunked_array_to_string: + * @chunked_array: A #GArrowChunkedArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): + * The formatted chunked array content or %NULL on error. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 0.11.0 + */ +gchar * +garrow_chunked_array_to_string(GArrowChunkedArray *chunked_array, GError **error) +{ + const auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array); + return g_strdup(arrow_chunked_array->ToString().c_str()); +} + +/** + * garrow_chunked_array_combine: + * @chunked_array: A #GArrowChunkedArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The combined array that has + * all data in all chunks. + * + * Since: 4.0.0 + */ +GArrowArray * +garrow_chunked_array_combine(GArrowChunkedArray *chunked_array, GError **error) +{ + const auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array); + auto arrow_combined_array = arrow::Concatenate(arrow_chunked_array->chunks()); + if (garrow::check(error, + arrow_combined_array, + "[chunked-array][combine]")) { + return garrow_array_new_raw(&(*arrow_combined_array)); + } else { + return NULL; + } +} + +G_END_DECLS + +GArrowChunkedArray * +garrow_chunked_array_new_raw(std::shared_ptr<arrow::ChunkedArray> *arrow_chunked_array) +{ + auto chunked_array = + GARROW_CHUNKED_ARRAY(g_object_new(GARROW_TYPE_CHUNKED_ARRAY, + "chunked-array", arrow_chunked_array, + NULL)); + return chunked_array; +} + +std::shared_ptr<arrow::ChunkedArray> +garrow_chunked_array_get_raw(GArrowChunkedArray *chunked_array) +{ + auto priv = GARROW_CHUNKED_ARRAY_GET_PRIVATE(chunked_array); + return priv->chunked_array; +} diff --git a/src/arrow/c_glib/arrow-glib/chunked-array.h b/src/arrow/c_glib/arrow-glib/chunked-array.h new file mode 100644 index 000000000..8e721f0bf --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/chunked-array.h @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/array.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_CHUNKED_ARRAY (garrow_chunked_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowChunkedArray, + garrow_chunked_array, + GARROW, + CHUNKED_ARRAY, + GObject) +struct _GArrowChunkedArrayClass +{ + GObjectClass parent_class; +}; + +GArrowChunkedArray *garrow_chunked_array_new(GList *chunks); + +gboolean garrow_chunked_array_equal(GArrowChunkedArray *chunked_array, + GArrowChunkedArray *other_chunked_array); + +GArrowDataType * +garrow_chunked_array_get_value_data_type(GArrowChunkedArray *chunked_array); +GArrowType +garrow_chunked_array_get_value_type(GArrowChunkedArray *chunked_array); + +GARROW_DEPRECATED_IN_0_15_FOR(garrow_chunked_array_get_n_rows) +guint64 garrow_chunked_array_get_length (GArrowChunkedArray *chunked_array); +GARROW_AVAILABLE_IN_0_15 +guint64 garrow_chunked_array_get_n_rows (GArrowChunkedArray *chunked_array); +guint64 garrow_chunked_array_get_n_nulls(GArrowChunkedArray *chunked_array); +guint garrow_chunked_array_get_n_chunks (GArrowChunkedArray *chunked_array); + +GArrowArray *garrow_chunked_array_get_chunk(GArrowChunkedArray *chunked_array, + guint i); +GList *garrow_chunked_array_get_chunks(GArrowChunkedArray *chunked_array); +GArrowChunkedArray *garrow_chunked_array_slice(GArrowChunkedArray *chunked_array, + guint64 offset, + guint64 length); +gchar *garrow_chunked_array_to_string(GArrowChunkedArray *chunked_array, + GError **error); +GARROW_AVAILABLE_IN_4_0 +GArrowArray *garrow_chunked_array_combine(GArrowChunkedArray *chunked_array, + GError **error); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/chunked-array.hpp b/src/arrow/c_glib/arrow-glib/chunked-array.hpp new file mode 100644 index 000000000..ec5068adc --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/chunked-array.hpp @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/api.h> + +#include <arrow-glib/chunked-array.h> + +GArrowChunkedArray *garrow_chunked_array_new_raw(std::shared_ptr<arrow::ChunkedArray> *arrow_chunked_array); +std::shared_ptr<arrow::ChunkedArray> garrow_chunked_array_get_raw(GArrowChunkedArray *chunked_array); diff --git a/src/arrow/c_glib/arrow-glib/codec.cpp b/src/arrow/c_glib/arrow-glib/codec.cpp new file mode 100644 index 000000000..fecf97704 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/codec.cpp @@ -0,0 +1,263 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/codec.hpp> +#include <arrow-glib/error.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: codec + * @title: Codec related type and class + * @include: arrow-glib/arrow-glib.h + * + * #GArrowCompressionType provides compression types corresponding to + * `arrow::Compression::type`. + * + * #GArrowCodec is a class for compressing and decompressing data. + */ + +typedef struct GArrowCodecPrivate_ { + std::shared_ptr<arrow::util::Codec> codec; +} GArrowCodecPrivate; + +enum { + PROP_CODEC = 1 +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowCodec, garrow_codec, G_TYPE_OBJECT) + +#define GARROW_CODEC_GET_PRIVATE(object) \ + static_cast<GArrowCodecPrivate *>( \ + garrow_codec_get_instance_private( \ + GARROW_CODEC(object))) + +static void +garrow_codec_finalize(GObject *object) +{ + auto priv = GARROW_CODEC_GET_PRIVATE(object); + + priv->codec.~shared_ptr(); + + G_OBJECT_CLASS(garrow_codec_parent_class)->finalize(object); +} + +static void +garrow_codec_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_CODEC_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_CODEC: + priv->codec = + *static_cast<std::shared_ptr<arrow::util::Codec> *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_codec_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + switch (prop_id) { + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_codec_init(GArrowCodec *object) +{ + auto priv = GARROW_CODEC_GET_PRIVATE(object); + new(&priv->codec) std::shared_ptr<arrow::util::Codec>; +} + +static void +garrow_codec_class_init(GArrowCodecClass *klass) +{ + GParamSpec *spec; + + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_codec_finalize; + gobject_class->set_property = garrow_codec_set_property; + gobject_class->get_property = garrow_codec_get_property; + + spec = g_param_spec_pointer("codec", + "Codec", + "The raw std::shared_ptr<arrow::util::Codec> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_CODEC, spec); +} + +/** + * garrow_codec_new: + * @type: A #GArrowCompressionType. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: A newly created #GArrowCodec on success, %NULL on error. + * + * Since: 0.12.0 + */ +GArrowCodec * +garrow_codec_new(GArrowCompressionType type, + GError **error) +{ + auto arrow_type = garrow_compression_type_to_raw(type); + auto arrow_codec = arrow::util::Codec::Create(arrow_type); + if (garrow::check(error, arrow_codec, "[codec][new]")) { + std::shared_ptr<arrow::util::Codec> arrow_codec_shared = + std::move(*arrow_codec); + return garrow_codec_new_raw(&arrow_codec_shared); + } else { + return NULL; + } +} + +/** + * garrow_codec_get_name: + * @codec: A #GArrowCodec. + * + * Returns: The name of the codec. + * + * Since: 0.12.0 + */ +const gchar * +garrow_codec_get_name(GArrowCodec *codec) +{ + auto arrow_codec = garrow_codec_get_raw(codec); + if (!arrow_codec) { + return NULL; + } + return arrow_codec->name().c_str(); +} + +/** + * garrow_codec_get_compression_type: + * @codec: A #GArrowCodec. + * + * Returns: The compression type of the codec. + * + * Since: 2.0.0 + */ +GArrowCompressionType +garrow_codec_get_compression_type(GArrowCodec *codec) +{ + auto arrow_codec = garrow_codec_get_raw(codec); + if (!arrow_codec) { + return GARROW_COMPRESSION_TYPE_UNCOMPRESSED; + } + return garrow_compression_type_from_raw(arrow_codec->compression_type()); +} + +/** + * garrow_codec_get_compression_level: + * @codec: A #GArrowCodec. + * + * Returns: The compression level of the codec. + * + * Since: 2.0.0 + */ +gint +garrow_codec_get_compression_level(GArrowCodec *codec) +{ + auto arrow_codec = garrow_codec_get_raw(codec); + if (!arrow_codec) { + return arrow::util::Codec::UseDefaultCompressionLevel(); + } + return arrow_codec->compression_level(); +} + +G_END_DECLS + +GArrowCompressionType +garrow_compression_type_from_raw(arrow::Compression::type arrow_type) +{ + switch (arrow_type) { + case arrow::Compression::type::UNCOMPRESSED: + return GARROW_COMPRESSION_TYPE_UNCOMPRESSED; + case arrow::Compression::type::SNAPPY: + return GARROW_COMPRESSION_TYPE_SNAPPY; + case arrow::Compression::type::GZIP: + return GARROW_COMPRESSION_TYPE_GZIP; + case arrow::Compression::type::BROTLI: + return GARROW_COMPRESSION_TYPE_BROTLI; + case arrow::Compression::type::ZSTD: + return GARROW_COMPRESSION_TYPE_ZSTD; + case arrow::Compression::type::LZ4: + return GARROW_COMPRESSION_TYPE_LZ4; + case arrow::Compression::type::LZO: + return GARROW_COMPRESSION_TYPE_LZO; + case arrow::Compression::type::BZ2: + return GARROW_COMPRESSION_TYPE_BZ2; + default: + return GARROW_COMPRESSION_TYPE_UNCOMPRESSED; + } +} + +arrow::Compression::type +garrow_compression_type_to_raw(GArrowCompressionType type) +{ + switch (type) { + case GARROW_COMPRESSION_TYPE_UNCOMPRESSED: + return arrow::Compression::type::UNCOMPRESSED; + case GARROW_COMPRESSION_TYPE_SNAPPY: + return arrow::Compression::type::SNAPPY; + case GARROW_COMPRESSION_TYPE_GZIP: + return arrow::Compression::type::GZIP; + case GARROW_COMPRESSION_TYPE_BROTLI: + return arrow::Compression::type::BROTLI; + case GARROW_COMPRESSION_TYPE_ZSTD: + return arrow::Compression::type::ZSTD; + case GARROW_COMPRESSION_TYPE_LZ4: + return arrow::Compression::type::LZ4; + case GARROW_COMPRESSION_TYPE_LZO: + return arrow::Compression::type::LZO; + case GARROW_COMPRESSION_TYPE_BZ2: + return arrow::Compression::type::BZ2; + default: + return arrow::Compression::type::UNCOMPRESSED; + } +} + +GArrowCodec * +garrow_codec_new_raw(std::shared_ptr<arrow::util::Codec> *arrow_codec) +{ + auto codec = GARROW_CODEC(g_object_new(GARROW_TYPE_CODEC, + "codec", arrow_codec, + NULL)); + return codec; +} + +std::shared_ptr<arrow::util::Codec> +garrow_codec_get_raw(GArrowCodec *codec) +{ + auto priv = GARROW_CODEC_GET_PRIVATE(codec); + return priv->codec; +} diff --git a/src/arrow/c_glib/arrow-glib/codec.h b/src/arrow/c_glib/arrow-glib/codec.h new file mode 100644 index 000000000..6e177af9e --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/codec.h @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/gobject-type.h> +#include <arrow-glib/version.h> + +G_BEGIN_DECLS + +/** + * GArrowCompressionType: + * @GARROW_COMPRESSION_TYPE_UNCOMPRESSED: Not compressed. + * @GARROW_COMPRESSION_TYPE_SNAPPY: Snappy compression. + * @GARROW_COMPRESSION_TYPE_GZIP: gzip compression. + * @GARROW_COMPRESSION_TYPE_BROTLI: Brotli compression. + * @GARROW_COMPRESSION_TYPE_ZSTD: Zstandard compression. + * @GARROW_COMPRESSION_TYPE_LZ4: LZ4 compression. + * @GARROW_COMPRESSION_TYPE_LZO: LZO compression. + * @GARROW_COMPRESSION_TYPE_BZ2: bzip2 compression. + * + * They are corresponding to `arrow::Compression::type` values. + */ +typedef enum { + GARROW_COMPRESSION_TYPE_UNCOMPRESSED, + GARROW_COMPRESSION_TYPE_SNAPPY, + GARROW_COMPRESSION_TYPE_GZIP, + GARROW_COMPRESSION_TYPE_BROTLI, + GARROW_COMPRESSION_TYPE_ZSTD, + GARROW_COMPRESSION_TYPE_LZ4, + GARROW_COMPRESSION_TYPE_LZO, + GARROW_COMPRESSION_TYPE_BZ2 +} GArrowCompressionType; + + +#define GARROW_TYPE_CODEC (garrow_codec_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowCodec, + garrow_codec, + GARROW, + CODEC, + GObject) +struct _GArrowCodecClass +{ + GObjectClass parent_class; +}; + +GArrowCodec *garrow_codec_new(GArrowCompressionType type, + GError **error); + +const gchar *garrow_codec_get_name(GArrowCodec *codec); +GARROW_AVAILABLE_IN_2_0 +GArrowCompressionType +garrow_codec_get_compression_type(GArrowCodec *codec); +GARROW_AVAILABLE_IN_2_0 +gint +garrow_codec_get_compression_level(GArrowCodec *codec); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/codec.hpp b/src/arrow/c_glib/arrow-glib/codec.hpp new file mode 100644 index 000000000..f4cfaba18 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/codec.hpp @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/util/compression.h> + +#include <arrow-glib/codec.h> + +GArrowCompressionType +garrow_compression_type_from_raw(arrow::Compression::type arrow_type); +arrow::Compression::type +garrow_compression_type_to_raw(GArrowCompressionType type); + +GArrowCodec * +garrow_codec_new_raw(std::shared_ptr<arrow::util::Codec> *arrow_codec); +std::shared_ptr<arrow::util::Codec> +garrow_codec_get_raw(GArrowCodec *codec); diff --git a/src/arrow/c_glib/arrow-glib/composite-array.cpp b/src/arrow/c_glib/arrow-glib/composite-array.cpp new file mode 100644 index 000000000..fd777f586 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/composite-array.cpp @@ -0,0 +1,1706 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/array.hpp> +#include <arrow-glib/buffer.hpp> +#include <arrow-glib/compute.hpp> +#include <arrow-glib/data-type.hpp> +#include <arrow-glib/error.hpp> +#include <arrow-glib/type.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: composite-array + * @section_id: composite-array-classes + * @title: Composite array classes + * @include: arrow-glib/arrow-glib.h + * + * #GArrowListArray is a class for list array. It can store zero or + * more list data. If you don't have Arrow format data, you need to + * use #GArrowListArrayBuilder to create a new array. + * + * #GArrowLargeListArray is a class for 64-bit offsets list array. + * It can store zero or more list data. If you don't have Arrow format data, + * you need to use #GArrowLargeListArrayBuilder to create a new array. + * + * #GArrowStructArray is a class for struct array. It can store zero + * or more structs. One struct has one or more fields. If you don't + * have Arrow format data, you need to use #GArrowStructArrayBuilder + * to create a new array. + * + * #GArrowMapArray is a class for map array. It can store + * data with keys and items. + * + * #GArrowUnionArray is a base class for union array. It can store + * zero or more unions. One union has one or more fields but one union + * can store only one field value. + * + * #GArrowDenseUnionArray is a class for dense union array. + * + * #GArrowSparseUnionArray is a class for sparse union array. + * + * #GArrowDictionaryArray is a class for dictionary array. It can + * store data with dictionary and indices. It's space effective than + * normal array when the array has many same values. You can convert a + * normal array to dictionary array by garrow_array_dictionary_encode(). + */ + +typedef struct GArrowListArrayPrivate_ { + GArrowArray *raw_values; +} GArrowListArrayPrivate; + +enum { + PROP_RAW_VALUES = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowListArray, + garrow_list_array, + GARROW_TYPE_ARRAY) + +#define GARROW_LIST_ARRAY_GET_PRIVATE(obj) \ + static_cast<GArrowListArrayPrivate *>( \ + garrow_list_array_get_instance_private( \ + GARROW_LIST_ARRAY(obj))) + +G_END_DECLS +template <typename LIST_ARRAY_CLASS> +GArrowArray * +garrow_base_list_array_new(GArrowDataType *data_type, + gint64 length, + GArrowBuffer *value_offsets, + GArrowArray *values, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + const auto arrow_data_type = garrow_data_type_get_raw(data_type); + const auto arrow_value_offsets = garrow_buffer_get_raw(value_offsets); + const auto arrow_values = garrow_array_get_raw(values); + const auto arrow_null_bitmap = garrow_buffer_get_raw(null_bitmap); + auto arrow_list_array = + std::make_shared<LIST_ARRAY_CLASS>(arrow_data_type, + length, + arrow_value_offsets, + arrow_values, + arrow_null_bitmap, + n_nulls); + auto arrow_array = + std::static_pointer_cast<arrow::Array>(arrow_list_array); + return garrow_array_new_raw(&arrow_array, + "array", &arrow_array, + "value-data-type", data_type, + "null-bitmap", null_bitmap, + "buffer1", value_offsets, + "raw-values", values, + NULL); +}; + +template <typename LIST_ARRAY_CLASS> +GArrowDataType * +garrow_base_list_array_get_value_type(GArrowArray *array) +{ + auto arrow_array = garrow_array_get_raw(array); + auto arrow_list_array = + std::static_pointer_cast<LIST_ARRAY_CLASS>(arrow_array); + auto arrow_value_type = arrow_list_array->value_type(); + return garrow_data_type_new_raw(&arrow_value_type); +}; + +template <typename LIST_ARRAY_CLASS> +GArrowArray * +garrow_base_list_array_get_value(GArrowArray *array, + gint64 i) +{ + auto arrow_array = garrow_array_get_raw(array); + auto arrow_list_array = + std::static_pointer_cast<LIST_ARRAY_CLASS>(arrow_array); + auto arrow_list = arrow_list_array->value_slice(i); + return garrow_array_new_raw(&arrow_list, + "array", &arrow_list, + "parent", array, + NULL); +}; + +template <typename LIST_ARRAY_CLASS> +GArrowArray * +garrow_base_list_array_get_values(GArrowArray *array) +{ + auto arrow_array = garrow_array_get_raw(array); + auto arrow_list_array = + std::static_pointer_cast<LIST_ARRAY_CLASS>(arrow_array); + auto arrow_values = arrow_list_array->values(); + return garrow_array_new_raw(&arrow_values, + "array", &arrow_values, + "parent", array, + NULL); +}; + +template <typename LIST_ARRAY_CLASS> +typename LIST_ARRAY_CLASS::offset_type +garrow_base_list_array_get_value_offset(GArrowArray *array, gint64 i) +{ + auto arrow_array = garrow_array_get_raw(array); + auto arrow_list_array = + std::static_pointer_cast<LIST_ARRAY_CLASS>(arrow_array); + return arrow_list_array->value_offset(i); +}; + +template <typename LIST_ARRAY_CLASS> +typename LIST_ARRAY_CLASS::offset_type +garrow_base_list_array_get_value_length(GArrowArray *array, gint64 i) +{ + auto arrow_array = garrow_array_get_raw(array); + auto arrow_list_array = + std::static_pointer_cast<LIST_ARRAY_CLASS>(arrow_array); + return arrow_list_array->value_length(i); +}; + +template <typename LIST_ARRAY_CLASS> +const typename LIST_ARRAY_CLASS::offset_type * +garrow_base_list_array_get_value_offsets(GArrowArray *array, gint64 *n_offsets) +{ + auto arrow_array = garrow_array_get_raw(array); + *n_offsets = arrow_array->length() + 1; + auto arrow_list_array = + std::static_pointer_cast<LIST_ARRAY_CLASS>(arrow_array); + return arrow_list_array->raw_value_offsets(); +}; + + +G_BEGIN_DECLS + +static void +garrow_list_array_dispose(GObject *object) +{ + auto priv = GARROW_LIST_ARRAY_GET_PRIVATE(object); + + if (priv->raw_values) { + g_object_unref(priv->raw_values); + priv->raw_values = NULL; + } + + + G_OBJECT_CLASS(garrow_list_array_parent_class)->dispose(object); +} + +static void +garrow_list_array_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_LIST_ARRAY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_RAW_VALUES: + priv->raw_values = GARROW_ARRAY(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_list_array_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_LIST_ARRAY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_RAW_VALUES: + g_value_set_object(value, priv->raw_values); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_list_array_init(GArrowListArray *object) +{ +} + +static void +garrow_list_array_class_init(GArrowListArrayClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_list_array_dispose; + gobject_class->set_property = garrow_list_array_set_property; + gobject_class->get_property = garrow_list_array_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("raw-values", + "Raw values", + "The raw values", + GARROW_TYPE_ARRAY, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_RAW_VALUES, spec); +} + +/** + * garrow_list_array_new: + * @data_type: The data type of the list. + * @length: The number of elements. + * @value_offsets: The offsets of @values in Arrow format. + * @values: The values as #GArrowArray. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowListArray. + * + * Since: 0.4.0 + */ +GArrowListArray * +garrow_list_array_new(GArrowDataType *data_type, + gint64 length, + GArrowBuffer *value_offsets, + GArrowArray *values, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + auto list_array = garrow_base_list_array_new<arrow::ListArray>( + data_type, + length, + value_offsets, + values, + null_bitmap, + n_nulls); + return GARROW_LIST_ARRAY(list_array); +} + +/** + * garrow_list_array_get_value_type: + * @array: A #GArrowListArray. + * + * Returns: (transfer full): The data type of value in each list. + */ +GArrowDataType * +garrow_list_array_get_value_type(GArrowListArray *array) +{ + return garrow_base_list_array_get_value_type<arrow::ListArray>( + GARROW_ARRAY(array)); +} + +/** + * garrow_list_array_get_value: + * @array: A #GArrowListArray. + * @i: The index of the target value. + * + * Returns: (transfer full): The i-th list. + */ +GArrowArray * +garrow_list_array_get_value(GArrowListArray *array, + gint64 i) +{ + return garrow_base_list_array_get_value<arrow::ListArray>( + GARROW_ARRAY(array), i); +} + +/** + * garrow_list_array_get_values: + * @array: A #GArrowListArray. + * + * Returns: (transfer full): The array containing the list's values. + * + * Since: 2.0.0 + */ +GArrowArray * +garrow_list_array_get_values(GArrowListArray *array) +{ + return garrow_base_list_array_get_values<arrow::ListArray>( + GARROW_ARRAY(array)); +} + +/** + * garrow_list_array_get_offset: + * @array: A #GArrowListArray. + * @i: The index of the offset of the target value. + * + * Returns: The target offset in the array containing the list's values. + * + * Since: 2.0.0 + */ +gint32 +garrow_list_array_get_value_offset(GArrowListArray *array, gint64 i) +{ + return garrow_base_list_array_get_value_offset<arrow::ListArray>( + GARROW_ARRAY(array), i); +} + +/** + * garrow_list_array_get_value_length: + * @array: A #GArrowListArray. + * @i: The index of the length of the target value. + * + * Returns: The target length in the array containing the list's values. + * + * Since: 2.0.0 + */ +gint32 +garrow_list_array_get_value_length(GArrowListArray *array, gint64 i) +{ + return garrow_base_list_array_get_value_length<arrow::ListArray>( + GARROW_ARRAY(array), i); +} + +/** + * garrow_list_array_get_value_offsets: + * @array: A #GArrowListArray. + * @n_offsets: The number of offsets to be returned. + * + * Returns: (array length=n_offsets): The target offsets in the array + * containing the list's values. + * + * Since: 2.0.0 + */ +const gint32 * +garrow_list_array_get_value_offsets(GArrowListArray *array, gint64 *n_offsets) +{ + return garrow_base_list_array_get_value_offsets<arrow::ListArray>( + GARROW_ARRAY(array), n_offsets); +} + + +typedef struct GArrowLargeListArrayPrivate_ { + GArrowArray *raw_values; +} GArrowLargeListArrayPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowLargeListArray, + garrow_large_list_array, + GARROW_TYPE_ARRAY) + +#define GARROW_LARGE_LIST_ARRAY_GET_PRIVATE(obj) \ + static_cast<GArrowLargeListArrayPrivate *>( \ + garrow_large_list_array_get_instance_private( \ + GARROW_LARGE_LIST_ARRAY(obj))) + +static void +garrow_large_list_array_dispose(GObject *object) +{ + auto priv = GARROW_LARGE_LIST_ARRAY_GET_PRIVATE(object); + + if (priv->raw_values) { + g_object_unref(priv->raw_values); + priv->raw_values = NULL; + } + + G_OBJECT_CLASS(garrow_large_list_array_parent_class)->dispose(object); +} + +static void +garrow_large_list_array_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_LARGE_LIST_ARRAY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_RAW_VALUES: + priv->raw_values = GARROW_ARRAY(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_large_list_array_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_LARGE_LIST_ARRAY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_RAW_VALUES: + g_value_set_object(value, priv->raw_values); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_large_list_array_init(GArrowLargeListArray *object) +{ +} + +static void +garrow_large_list_array_class_init(GArrowLargeListArrayClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_large_list_array_dispose; + gobject_class->set_property = garrow_large_list_array_set_property; + gobject_class->get_property = garrow_large_list_array_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("raw-values", + "Raw values", + "The raw values", + GARROW_TYPE_ARRAY, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_RAW_VALUES, spec); +} + +/** + * garrow_large_list_array_new: + * @data_type: The data type of the list. + * @length: The number of elements. + * @value_offsets: The offsets of @values in Arrow format. + * @values: The values as #GArrowArray. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowLargeListArray. + * + * Since: 0.16.0 + */ +GArrowLargeListArray * +garrow_large_list_array_new(GArrowDataType *data_type, + gint64 length, + GArrowBuffer *value_offsets, + GArrowArray *values, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + auto large_list_array = garrow_base_list_array_new<arrow::LargeListArray>( + data_type, + length, + value_offsets, + values, + null_bitmap, + n_nulls); + return GARROW_LARGE_LIST_ARRAY(large_list_array); +} + +/** + * garrow_large_list_array_get_value_type: + * @array: A #GArrowLargeListArray. + * + * Returns: (transfer full): The data type of value in each list. + * + * Since: 0.16.0 + */ +GArrowDataType * +garrow_large_list_array_get_value_type(GArrowLargeListArray *array) +{ + return garrow_base_list_array_get_value_type<arrow::LargeListArray>( + GARROW_ARRAY(array)); +} + +/** + * garrow_large_list_array_get_value: + * @array: A #GArrowLargeListArray. + * @i: The index of the target value. + * + * Returns: (transfer full): The @i-th list. + * + * Since: 0.16.0 + */ +GArrowArray * +garrow_large_list_array_get_value(GArrowLargeListArray *array, + gint64 i) +{ + return garrow_base_list_array_get_value<arrow::LargeListArray>( + GARROW_ARRAY(array), + i); +} + +/** + * garrow_large_list_array_get_values: + * @array: A #GArrowLargeListArray. + * + * Returns: (transfer full): The array containing the list's values. + * + * Since: 2.0.0 + */ +GArrowArray * +garrow_large_list_array_get_values(GArrowLargeListArray *array) +{ + return garrow_base_list_array_get_values<arrow::LargeListArray>( + GARROW_ARRAY(array)); +} + +/** + * garrow_large_list_array_get_value_offset: + * @array: A #GArrowLargeListArray. + * @i: The index of the offset of the target value. + * + * Returns: The target offset in the array containing the list's values. + * + * Since: 2.0.0 + */ +gint64 +garrow_large_list_array_get_value_offset(GArrowLargeListArray *array, gint64 i) +{ + return garrow_base_list_array_get_value_offset<arrow::LargeListArray>( + GARROW_ARRAY(array), i); +} + +/** + * garrow_large_list_array_get_length: + * @array: A #GArrowLargeListArray. + * @i: The index of the length of the target value. + * + * Returns: The target length in the array containing the list's values. + * + * Since: 2.0.0 + */ +gint64 +garrow_large_list_array_get_value_length(GArrowLargeListArray *array, gint64 i) +{ + return garrow_base_list_array_get_value_length<arrow::LargeListArray>( + GARROW_ARRAY(array), i); +} + +/** + * garrow_large_list_array_get_value_offsets: + * @array: A #GArrowLargeListArray. + * @n_offsets: The number of offsets to be returned. + * + * Returns: (array length=n_offsets): The target offsets in the array + * containing the list's values. + * + * Since: 2.0.0 + */ +const gint64 * +garrow_large_list_array_get_value_offsets(GArrowLargeListArray *array, + gint64 *n_offsets) +{ + return garrow_base_list_array_get_value_offsets<arrow::LargeListArray>( + GARROW_ARRAY(array), n_offsets); +} + + +typedef struct GArrowStructArrayPrivate_ { + GPtrArray *fields; +} GArrowStructArrayPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowStructArray, + garrow_struct_array, + GARROW_TYPE_ARRAY) + +#define GARROW_STRUCT_ARRAY_GET_PRIVATE(obj) \ + static_cast<GArrowStructArrayPrivate *>( \ + garrow_struct_array_get_instance_private( \ + GARROW_STRUCT_ARRAY(obj))) + +static void +garrow_struct_array_dispose(GObject *object) +{ + auto priv = GARROW_STRUCT_ARRAY_GET_PRIVATE(object); + + if (priv->fields) { + g_ptr_array_free(priv->fields, TRUE); + priv->fields = NULL; + } + + G_OBJECT_CLASS(garrow_struct_array_parent_class)->dispose(object); +} + +static void +garrow_struct_array_init(GArrowStructArray *object) +{ +} + +static void +garrow_struct_array_class_init(GArrowStructArrayClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->dispose = garrow_struct_array_dispose; +} + +/** + * garrow_struct_array_new: + * @data_type: The data type of the struct. + * @length: The number of elements. + * @fields: (element-type GArrowArray): The arrays for each field + * as #GList of #GArrowArray. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowStructArray. + * + * Since: 0.4.0 + */ +GArrowStructArray * +garrow_struct_array_new(GArrowDataType *data_type, + gint64 length, + GList *fields, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + const auto arrow_data_type = garrow_data_type_get_raw(data_type); + std::vector<std::shared_ptr<arrow::Array>> arrow_fields; + for (auto node = fields; node; node = node->next) { + auto field = GARROW_ARRAY(node->data); + arrow_fields.push_back(garrow_array_get_raw(field)); + } + const auto arrow_null_bitmap = garrow_buffer_get_raw(null_bitmap); + auto arrow_struct_array = + std::make_shared<arrow::StructArray>(arrow_data_type, + length, + arrow_fields, + arrow_null_bitmap, + n_nulls); + auto arrow_array = + std::static_pointer_cast<arrow::Array>(arrow_struct_array); + auto struct_array = + garrow_array_new_raw(&arrow_array, + "array", &arrow_array, + "null-bitmap", null_bitmap, + NULL); + auto priv = GARROW_STRUCT_ARRAY_GET_PRIVATE(struct_array); + priv->fields = g_ptr_array_sized_new(arrow_fields.size()); + g_ptr_array_set_free_func(priv->fields, g_object_unref); + for (auto node = fields; node; node = node->next) { + auto field = GARROW_ARRAY(node->data); + g_ptr_array_add(priv->fields, g_object_ref(field)); + } + return GARROW_STRUCT_ARRAY(struct_array); +} + +static GPtrArray * +garrow_struct_array_get_fields_internal(GArrowStructArray *array) +{ + auto priv = GARROW_STRUCT_ARRAY_GET_PRIVATE(array); + if (!priv->fields) { + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_struct_array = + std::static_pointer_cast<arrow::StructArray>(arrow_array); + auto arrow_fields = arrow_struct_array->fields(); + priv->fields = g_ptr_array_sized_new(arrow_fields.size()); + g_ptr_array_set_free_func(priv->fields, g_object_unref); + for (auto &arrow_field : arrow_fields) { + g_ptr_array_add(priv->fields, garrow_array_new_raw(&arrow_field)); + } + } + return priv->fields; +} + +/** + * garrow_struct_array_get_field + * @array: A #GArrowStructArray. + * @i: The index of the field in the struct. + * + * Returns: (transfer full): The i-th field. + */ +GArrowArray * +garrow_struct_array_get_field(GArrowStructArray *array, + gint i) +{ + auto fields = garrow_struct_array_get_fields_internal(array); + if (i < 0) { + i += fields->len; + } + if (i < 0) { + return NULL; + } + if (i >= static_cast<gint>(fields->len)) { + return NULL; + } + auto field = static_cast<GArrowArray *>(g_ptr_array_index(fields, i)); + g_object_ref(field); + return field; +} + +/** + * garrow_struct_array_get_fields + * @array: A #GArrowStructArray. + * + * Returns: (element-type GArrowArray) (transfer full): + * The fields in the struct. + */ +GList * +garrow_struct_array_get_fields(GArrowStructArray *array) +{ + auto fields = garrow_struct_array_get_fields_internal(array); + + GList *field_list = NULL; + for (guint i = 0; i < fields->len; ++i) { + auto field = static_cast<GArrowArray *>(g_ptr_array_index(fields, i)); + field_list = g_list_prepend(field_list, g_object_ref(field)); + } + return g_list_reverse(field_list); +} + +/** + * garrow_struct_array_flatten + * @array: A #GArrowStructArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (element-type GArrowArray) (transfer full): + * The fields in the struct. + * + * Since: 0.10.0 + */ +GList * +garrow_struct_array_flatten(GArrowStructArray *array, GError **error) +{ + const auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_struct_array = + std::static_pointer_cast<arrow::StructArray>(arrow_array); + + auto memory_pool = arrow::default_memory_pool(); + auto arrow_arrays = arrow_struct_array->Flatten(memory_pool); + if (!garrow::check(error, arrow_arrays, "[struct-array][flatten]")) { + return NULL; + } + + GList *arrays = NULL; + for (auto arrow_array : *arrow_arrays) { + auto array = garrow_array_new_raw(&arrow_array); + arrays = g_list_prepend(arrays, array); + } + + return g_list_reverse(arrays); +} + + +typedef struct GArrowMapArrayPrivate_ { + GArrowArray *offsets; + GArrowArray *keys; + GArrowArray *items; +} GArrowMapArrayPrivate; + +enum { + PROP_OFFSETS = 1, + PROP_KEYS, + PROP_ITEMS, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowMapArray, + garrow_map_array, + GARROW_TYPE_LIST_ARRAY) + +#define GARROW_MAP_ARRAY_GET_PRIVATE(obj) \ + static_cast<GArrowMapArrayPrivate *>( \ + garrow_map_array_get_instance_private( \ + GARROW_MAP_ARRAY(obj))) + +static void +garrow_map_array_dispose(GObject *object) +{ + auto priv = GARROW_MAP_ARRAY_GET_PRIVATE(object); + + if (priv->offsets) { + g_object_unref(priv->offsets); + priv->offsets = NULL; + } + + if (priv->keys) { + g_object_unref(priv->keys); + priv->keys = NULL; + } + + if (priv->items) { + g_object_unref(priv->items); + priv->items = NULL; + } + + G_OBJECT_CLASS(garrow_map_array_parent_class)->dispose(object); +} + +static void +garrow_map_array_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_MAP_ARRAY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_OFFSETS: + priv->offsets = GARROW_ARRAY(g_value_dup_object(value)); + break; + case PROP_KEYS: + priv->keys = GARROW_ARRAY(g_value_dup_object(value)); + break; + case PROP_ITEMS: + priv->items = GARROW_ARRAY(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_map_array_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_MAP_ARRAY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_OFFSETS: + g_value_set_object(value, priv->offsets); + break; + case PROP_KEYS: + g_value_set_object(value, priv->keys); + break; + case PROP_ITEMS: + g_value_set_object(value, priv->items); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_map_array_init(GArrowMapArray *object) +{ +} + +static void +garrow_map_array_class_init(GArrowMapArrayClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_map_array_dispose; + gobject_class->set_property = garrow_map_array_set_property; + gobject_class->get_property = garrow_map_array_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("offsets", + "Offsets", + "The GArrowArray for offsets", + GARROW_TYPE_ARRAY, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_OFFSETS, spec); + + spec = g_param_spec_object("keys", + "Keys", + "The GArrowArray for keys", + GARROW_TYPE_ARRAY, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_KEYS, spec); + + spec = g_param_spec_object("items", + "Items", + "The GArrowArray for items", + GARROW_TYPE_ARRAY, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_ITEMS, spec); +} + +/** + * garrow_map_array_new: + * @offsets: The offsets Array containing n + 1 offsets encoding length and size. + * @keys: The Array containing key values. + * @items: The items Array containing item values. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowMapArray + * or %NULL on error. + * + * Since: 0.17.0 + */ +GArrowMapArray * +garrow_map_array_new(GArrowArray *offsets, + GArrowArray *keys, + GArrowArray *items, + GError **error) +{ + const auto arrow_offsets = garrow_array_get_raw(offsets); + const auto arrow_keys = garrow_array_get_raw(keys); + const auto arrow_items = garrow_array_get_raw(items); + auto arrow_memory_pool = arrow::default_memory_pool(); + auto arrow_array_result = arrow::MapArray::FromArrays(arrow_offsets, + arrow_keys, + arrow_items, + arrow_memory_pool); + if (garrow::check(error, arrow_array_result, "[map-array][new]")) { + auto arrow_array = *arrow_array_result; + return GARROW_MAP_ARRAY(garrow_array_new_raw(&arrow_array, + "array", &arrow_array, + "offsets", offsets, + "keys", keys, + "items", items, + NULL)); + } else { + return NULL; + } +} + +/** + * garrow_map_array_get_keys: + * @array: A #GArrowMapArray. + * + * Returns: (transfer full): The Array containing key values. + * + * Since: 0.17.0 + */ +GArrowArray * +garrow_map_array_get_keys(GArrowMapArray *array) +{ + auto priv = GARROW_MAP_ARRAY_GET_PRIVATE(array); + if (priv->keys) { + g_object_ref(priv->keys); + return priv->keys; + } + + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_map_array = + std::static_pointer_cast<arrow::MapArray>(arrow_array); + auto arrow_keys = arrow_map_array->keys(); + return garrow_array_new_raw(&arrow_keys); +} + +/** + * garrow_map_array_get_items: + * @array: A #GArrowMapArray. + * + * Returns: (transfer full): The items Array containing item values. + * + * Since: 0.17.0 + */ +GArrowArray * +garrow_map_array_get_items(GArrowMapArray *array) +{ + auto priv = GARROW_MAP_ARRAY_GET_PRIVATE(array); + if (priv->items) { + g_object_ref(priv->items); + return priv->items; + } + + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_map_array = + std::static_pointer_cast<arrow::MapArray>(arrow_array); + auto arrow_items = arrow_map_array->items(); + return garrow_array_new_raw(&arrow_items); +} + + +typedef struct GArrowUnionArrayPrivate_ { + GArrowInt8Array *type_ids; + GPtrArray *fields; +} GArrowUnionArrayPrivate; + +enum { + PROP_TYPE_IDS = 1, + PROP_FIELDS, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowUnionArray, + garrow_union_array, + GARROW_TYPE_ARRAY) + +#define GARROW_UNION_ARRAY_GET_PRIVATE(obj) \ + static_cast<GArrowUnionArrayPrivate *>( \ + garrow_union_array_get_instance_private( \ + GARROW_UNION_ARRAY(obj))) + +static void +garrow_union_array_dispose(GObject *object) +{ + auto priv = GARROW_UNION_ARRAY_GET_PRIVATE(object); + + if (priv->type_ids) { + g_object_unref(priv->type_ids); + priv->type_ids = NULL; + } + + if (priv->fields) { + g_ptr_array_free(priv->fields, TRUE); + priv->fields = NULL; + } + + G_OBJECT_CLASS(garrow_union_array_parent_class)->dispose(object); +} + +static void +garrow_union_array_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_UNION_ARRAY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_TYPE_IDS: + priv->type_ids = GARROW_INT8_ARRAY(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_union_array_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_UNION_ARRAY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_TYPE_IDS: + g_value_set_object(value, priv->type_ids); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_union_array_init(GArrowUnionArray *object) +{ +} + +static void +garrow_union_array_class_init(GArrowUnionArrayClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_union_array_dispose; + gobject_class->set_property = garrow_union_array_set_property; + gobject_class->get_property = garrow_union_array_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("type-ids", + "Type IDs", + "The GArrowInt8Array for type IDs", + GARROW_TYPE_INT8_ARRAY, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_TYPE_IDS, spec); +} + +/** + * garrow_union_array_get_field + * @array: A #GArrowUnionArray. + * @i: The index of the field in the union. + * + * Returns: (nullable) (transfer full): The i-th field values as a + * #GArrowArray or %NULL on out of range. + */ +GArrowArray * +garrow_union_array_get_field(GArrowUnionArray *array, + gint i) +{ + auto priv = GARROW_UNION_ARRAY_GET_PRIVATE(array); + if (!priv->fields) { + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_union_array = + std::static_pointer_cast<arrow::UnionArray>(arrow_array); + auto n_fields = arrow_union_array->num_fields(); + priv->fields = g_ptr_array_sized_new(n_fields); + g_ptr_array_set_free_func(priv->fields, g_object_unref); + for (int i = 0; i < n_fields; ++i) { + auto arrow_field = arrow_union_array->field(i); + g_ptr_array_add(priv->fields, garrow_array_new_raw(&arrow_field)); + } + } + + if (i < 0) { + i += priv->fields->len; + } + if (i < 0) { + return NULL; + } + if (i >= static_cast<gint>(priv->fields->len)) { + return NULL; + } + auto field = static_cast<GArrowArray *>(g_ptr_array_index(priv->fields, i)); + g_object_ref(field); + return field; +} + + +G_DEFINE_TYPE(GArrowSparseUnionArray, + garrow_sparse_union_array, + GARROW_TYPE_UNION_ARRAY) + +static void +garrow_sparse_union_array_init(GArrowSparseUnionArray *object) +{ +} + +static void +garrow_sparse_union_array_class_init(GArrowSparseUnionArrayClass *klass) +{ +} + +static GArrowSparseUnionArray * +garrow_sparse_union_array_new_internal(GArrowSparseUnionDataType *data_type, + GArrowInt8Array *type_ids, + GList *fields, + GError **error, + const char *context) +{ + auto arrow_type_ids = garrow_array_get_raw(GARROW_ARRAY(type_ids)); + std::vector<std::shared_ptr<arrow::Array>> arrow_fields; + for (auto node = fields; node; node = node->next) { + auto *field = GARROW_ARRAY(node->data); + arrow_fields.push_back(garrow_array_get_raw(field)); + } + arrow::Result<std::shared_ptr<arrow::Array>> arrow_sparse_union_array_result; + if (data_type) { + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto arrow_union_data_type = + std::static_pointer_cast<arrow::UnionType>(arrow_data_type); + std::vector<std::string> arrow_field_names; + for (const auto &arrow_field : arrow_union_data_type->fields()) { + arrow_field_names.push_back(arrow_field->name()); + } + arrow_sparse_union_array_result = + arrow::SparseUnionArray::Make(*arrow_type_ids, + arrow_fields, + arrow_field_names, + arrow_union_data_type->type_codes()); + } else { + arrow_sparse_union_array_result = + arrow::SparseUnionArray::Make(*arrow_type_ids, arrow_fields); + } + if (garrow::check(error, + arrow_sparse_union_array_result, + context)) { + auto arrow_sparse_union_array = *arrow_sparse_union_array_result; + auto sparse_union_array = + garrow_array_new_raw(&arrow_sparse_union_array, + "array", &arrow_sparse_union_array, + "value-data-type", data_type, + "type-ids", type_ids, + NULL); + auto priv = GARROW_UNION_ARRAY_GET_PRIVATE(sparse_union_array); + priv->fields = g_ptr_array_sized_new(arrow_fields.size()); + g_ptr_array_set_free_func(priv->fields, g_object_unref); + for (auto node = fields; node; node = node->next) { + auto field = GARROW_ARRAY(node->data); + g_ptr_array_add(priv->fields, g_object_ref(field)); + } + return GARROW_SPARSE_UNION_ARRAY(sparse_union_array); + } else { + return NULL; + } +} + +/** + * garrow_sparse_union_array_new: + * @type_ids: The field type IDs for each value as #GArrowInt8Array. + * @fields: (element-type GArrowArray): The arrays for each field + * as #GList of #GArrowArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowSparseUnionArray + * or %NULL on error. + * + * Since: 0.12.0 + */ +GArrowSparseUnionArray * +garrow_sparse_union_array_new(GArrowInt8Array *type_ids, + GList *fields, + GError **error) +{ + return garrow_sparse_union_array_new_internal(NULL, + type_ids, + fields, + error, + "[sparse-union-array][new]"); +} + +/** + * garrow_sparse_union_array_new_data_type: + * @data_type: The data type for the sparse array. + * @type_ids: The field type IDs for each value as #GArrowInt8Array. + * @fields: (element-type GArrowArray): The arrays for each field + * as #GList of #GArrowArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowSparseUnionArray + * or %NULL on error. + * + * Since: 0.14.0 + */ +GArrowSparseUnionArray * +garrow_sparse_union_array_new_data_type(GArrowSparseUnionDataType *data_type, + GArrowInt8Array *type_ids, + GList *fields, + GError **error) +{ + return garrow_sparse_union_array_new_internal( + data_type, + type_ids, + fields, + error, + "[sparse-union-array][new][data-type]"); +} + + +typedef struct GArrowDenseUnionArrayPrivate_ { + GArrowInt32Array *value_offsets; +} GArrowDenseUnionArrayPrivate; + +enum { + PROP_VALUE_OFFSETS = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowDenseUnionArray, + garrow_dense_union_array, + GARROW_TYPE_UNION_ARRAY) + +#define GARROW_DENSE_UNION_ARRAY_GET_PRIVATE(obj) \ + static_cast<GArrowDenseUnionArrayPrivate *>( \ + garrow_dense_union_array_get_instance_private( \ + GARROW_DENSE_UNION_ARRAY(obj))) + +static void +garrow_dense_union_array_dispose(GObject *object) +{ + auto priv = GARROW_DENSE_UNION_ARRAY_GET_PRIVATE(object); + + if (priv->value_offsets) { + g_object_unref(priv->value_offsets); + priv->value_offsets = NULL; + } + + G_OBJECT_CLASS(garrow_dense_union_array_parent_class)->dispose(object); +} + +static void +garrow_dense_union_array_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_DENSE_UNION_ARRAY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_VALUE_OFFSETS: + priv->value_offsets = GARROW_INT32_ARRAY(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_dense_union_array_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_DENSE_UNION_ARRAY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_VALUE_OFFSETS: + g_value_set_object(value, priv->value_offsets); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_dense_union_array_init(GArrowDenseUnionArray *object) +{ +} + +static void +garrow_dense_union_array_class_init(GArrowDenseUnionArrayClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_dense_union_array_dispose; + gobject_class->set_property = garrow_dense_union_array_set_property; + gobject_class->get_property = garrow_dense_union_array_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("value-offsets", + "Value offsets", + "The GArrowInt32Array for value offsets", + GARROW_TYPE_INT32_ARRAY, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_VALUE_OFFSETS, spec); +} + +static GArrowDenseUnionArray * +garrow_dense_union_array_new_internal(GArrowDenseUnionDataType *data_type, + GArrowInt8Array *type_ids, + GArrowInt32Array *value_offsets, + GList *fields, + GError **error, + const gchar *context) +{ + auto arrow_type_ids = garrow_array_get_raw(GARROW_ARRAY(type_ids)); + auto arrow_value_offsets = garrow_array_get_raw(GARROW_ARRAY(value_offsets)); + std::vector<std::shared_ptr<arrow::Array>> arrow_fields; + for (auto node = fields; node; node = node->next) { + auto *field = GARROW_ARRAY(node->data); + arrow_fields.push_back(garrow_array_get_raw(field)); + } + arrow::Result<std::shared_ptr<arrow::Array>> arrow_dense_union_array_result; + if (data_type) { + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto arrow_union_data_type = + std::static_pointer_cast<arrow::UnionType>(arrow_data_type); + std::vector<std::string> arrow_field_names; + for (const auto &arrow_field : arrow_union_data_type->fields()) { + arrow_field_names.push_back(arrow_field->name()); + } + arrow_dense_union_array_result = + arrow::DenseUnionArray::Make(*arrow_type_ids, + *arrow_value_offsets, + arrow_fields, + arrow_field_names, + arrow_union_data_type->type_codes()); + } else { + arrow_dense_union_array_result = + arrow::DenseUnionArray::Make(*arrow_type_ids, + *arrow_value_offsets, + arrow_fields); + } + if (garrow::check(error, + arrow_dense_union_array_result, + context)) { + auto arrow_dense_union_array = *arrow_dense_union_array_result; + auto dense_union_array = + garrow_array_new_raw(&arrow_dense_union_array, + "array", &arrow_dense_union_array, + "value-data-type", data_type, + "type-ids", type_ids, + "value-offsets", value_offsets, + NULL); + auto priv = GARROW_UNION_ARRAY_GET_PRIVATE(dense_union_array); + priv->fields = g_ptr_array_sized_new(arrow_fields.size()); + g_ptr_array_set_free_func(priv->fields, g_object_unref); + for (auto node = fields; node; node = node->next) { + auto field = GARROW_ARRAY(node->data); + g_ptr_array_add(priv->fields, g_object_ref(field)); + } + return GARROW_DENSE_UNION_ARRAY(dense_union_array); + } else { + return NULL; + } +} + +/** + * garrow_dense_union_array_new: + * @type_ids: The field type IDs for each value as #GArrowInt8Array. + * @value_offsets: The value offsets for each value as #GArrowInt32Array. + * Each offset is counted for each type. + * @fields: (element-type GArrowArray): The arrays for each field + * as #GList of #GArrowArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowDenseUnionArray + * or %NULL on error. + * + * Since: 0.12.0 + */ +GArrowDenseUnionArray * +garrow_dense_union_array_new(GArrowInt8Array *type_ids, + GArrowInt32Array *value_offsets, + GList *fields, + GError **error) +{ + return garrow_dense_union_array_new_internal(NULL, + type_ids, + value_offsets, + fields, + error, + "[dense-union-array][new]"); +} + +/** + * garrow_dense_union_array_new_data_type: + * @data_type: The data type for the dense array. + * @type_ids: The field type IDs for each value as #GArrowInt8Array. + * @value_offsets: The value offsets for each value as #GArrowInt32Array. + * Each offset is counted for each type. + * @fields: (element-type GArrowArray): The arrays for each field + * as #GList of #GArrowArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowSparseUnionArray + * or %NULL on error. + * + * Since: 0.14.0 + */ +GArrowDenseUnionArray * +garrow_dense_union_array_new_data_type(GArrowDenseUnionDataType *data_type, + GArrowInt8Array *type_ids, + GArrowInt32Array *value_offsets, + GList *fields, + GError **error) +{ + return garrow_dense_union_array_new_internal( + data_type, + type_ids, + value_offsets, + fields, + error, + "[dense-union-array][new][data-type]"); +} + + +typedef struct GArrowDictionaryArrayPrivate_ { + GArrowArray *indices; + GArrowArray *dictionary; +} GArrowDictionaryArrayPrivate; + +enum { + PROP_INDICES = 1, + PROP_DICTIONARY, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowDictionaryArray, + garrow_dictionary_array, + GARROW_TYPE_ARRAY) + +#define GARROW_DICTIONARY_ARRAY_GET_PRIVATE(obj) \ + static_cast<GArrowDictionaryArrayPrivate *>( \ + garrow_dictionary_array_get_instance_private( \ + GARROW_DICTIONARY_ARRAY(obj))) + +static void +garrow_dictionary_array_dispose(GObject *object) +{ + auto priv = GARROW_DICTIONARY_ARRAY_GET_PRIVATE(object); + + if (priv->indices) { + g_object_unref(priv->indices); + priv->indices = NULL; + } + + if (priv->dictionary) { + g_object_unref(priv->dictionary); + priv->dictionary = NULL; + } + + G_OBJECT_CLASS(garrow_dictionary_array_parent_class)->dispose(object); +} + +static void +garrow_dictionary_array_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_DICTIONARY_ARRAY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_INDICES: + priv->indices = GARROW_ARRAY(g_value_dup_object(value)); + break; + case PROP_DICTIONARY: + priv->dictionary = GARROW_ARRAY(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_dictionary_array_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_DICTIONARY_ARRAY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_INDICES: + g_value_set_object(value, priv->indices); + break; + case PROP_DICTIONARY: + g_value_set_object(value, priv->dictionary); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_dictionary_array_init(GArrowDictionaryArray *object) +{ +} + +static void +garrow_dictionary_array_class_init(GArrowDictionaryArrayClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_dictionary_array_dispose; + gobject_class->set_property = garrow_dictionary_array_set_property; + gobject_class->get_property = garrow_dictionary_array_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("indices", + "The indices", + "The GArrowArray for indices", + GARROW_TYPE_ARRAY, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_INDICES, spec); + + spec = g_param_spec_object("dictionary", + "The dictionary", + "The GArrowArray for dictionary", + GARROW_TYPE_ARRAY, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_DICTIONARY, spec); +} + +/** + * garrow_dictionary_array_new: + * @data_type: The data type of the dictionary array. + * @indices: The indices of values in dictionary. + * @dictionary: The dictionary of the dictionary array. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowDictionaryArray + * or %NULL on error. + * + * Since: 0.8.0 + */ +GArrowDictionaryArray * +garrow_dictionary_array_new(GArrowDataType *data_type, + GArrowArray *indices, + GArrowArray *dictionary, + GError **error) +{ + const auto arrow_data_type = garrow_data_type_get_raw(data_type); + const auto arrow_indices = garrow_array_get_raw(indices); + const auto arrow_dictionary = garrow_array_get_raw(dictionary); + auto arrow_dictionary_array_result = + arrow::DictionaryArray::FromArrays( + arrow_data_type, + arrow_indices, + arrow_dictionary); + if (garrow::check(error, + arrow_dictionary_array_result, + "[dictionary-array][new]")) { + auto arrow_array = + std::static_pointer_cast<arrow::Array>(*arrow_dictionary_array_result); + auto dictionary_array = garrow_array_new_raw(&arrow_array, + "array", &arrow_array, + "value-data-type", data_type, + "indices", indices, + "dictionary", dictionary, + NULL); + return GARROW_DICTIONARY_ARRAY(dictionary_array); + } else { + return NULL; + } +} + +/** + * garrow_dictionary_array_get_indices: + * @array: A #GArrowDictionaryArray. + * + * Returns: (transfer full): The indices of values in dictionary. + * + * Since: 0.8.0 + */ +GArrowArray * +garrow_dictionary_array_get_indices(GArrowDictionaryArray *array) +{ + auto priv = GARROW_DICTIONARY_ARRAY_GET_PRIVATE(array); + if (priv->indices) { + g_object_ref(priv->indices); + return priv->indices; + } + + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_dictionary_array = + std::static_pointer_cast<arrow::DictionaryArray>(arrow_array); + auto arrow_indices = arrow_dictionary_array->indices(); + return garrow_array_new_raw(&arrow_indices); +} + +/** + * garrow_dictionary_array_get_dictionary: + * @array: A #GArrowDictionaryArray. + * + * Returns: (transfer full): The dictionary of this array. + * + * Since: 0.8.0 + */ +GArrowArray * +garrow_dictionary_array_get_dictionary(GArrowDictionaryArray *array) +{ + auto priv = GARROW_DICTIONARY_ARRAY_GET_PRIVATE(array); + if (priv->dictionary) { + g_object_ref(priv->dictionary); + return priv->dictionary; + } + + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_dictionary_array = + std::static_pointer_cast<arrow::DictionaryArray>(arrow_array); + auto arrow_dictionary = arrow_dictionary_array->dictionary(); + return garrow_array_new_raw(&arrow_dictionary); +} + +/** + * garrow_dictionary_array_get_dictionary_data_type: + * @array: A #GArrowDictionaryArray. + * + * Returns: (transfer full): The dictionary data type of this array. + * + * Since: 0.8.0 + * + * Deprecated: 1.0.0: Use garrow_array_get_value_data_type() instead. + */ +GArrowDictionaryDataType * +garrow_dictionary_array_get_dictionary_data_type(GArrowDictionaryArray *array) +{ + auto data_type = garrow_array_get_value_data_type(GARROW_ARRAY(array)); + return GARROW_DICTIONARY_DATA_TYPE(data_type); +} + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/composite-array.h b/src/arrow/c_glib/arrow-glib/composite-array.h new file mode 100644 index 000000000..cfaeb4c76 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/composite-array.h @@ -0,0 +1,240 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/version.h> + +#include <arrow-glib/basic-array.h> +#include <arrow-glib/data-type.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_LIST_ARRAY (garrow_list_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowListArray, + garrow_list_array, + GARROW, + LIST_ARRAY, + GArrowArray) +struct _GArrowListArrayClass +{ + GArrowArrayClass parent_class; +}; + +GArrowListArray *garrow_list_array_new(GArrowDataType *data_type, + gint64 length, + GArrowBuffer *value_offsets, + GArrowArray *values, + GArrowBuffer *null_bitmap, + gint64 n_nulls); + +GArrowDataType *garrow_list_array_get_value_type(GArrowListArray *array); +GArrowArray *garrow_list_array_get_value(GArrowListArray *array, + gint64 i); +GARROW_AVAILABLE_IN_2_0 +GArrowArray *garrow_list_array_get_values(GArrowListArray *array); +GARROW_AVAILABLE_IN_2_0 +gint32 garrow_list_array_get_value_offset(GArrowListArray *array, + gint64 i); +GARROW_AVAILABLE_IN_2_0 +gint32 garrow_list_array_get_value_length(GArrowListArray *array, + gint64 i); +GARROW_AVAILABLE_IN_2_0 +const gint32 * +garrow_list_array_get_value_offsets(GArrowListArray *array, + gint64 *n_offsets); + + +#define GARROW_TYPE_LARGE_LIST_ARRAY (garrow_large_list_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowLargeListArray, + garrow_large_list_array, + GARROW, + LARGE_LIST_ARRAY, + GArrowArray) +struct _GArrowLargeListArrayClass +{ + GArrowArrayClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_16 +GArrowLargeListArray *garrow_large_list_array_new(GArrowDataType *data_type, + gint64 length, + GArrowBuffer *value_offsets, + GArrowArray *values, + GArrowBuffer *null_bitmap, + gint64 n_nulls); + +GARROW_AVAILABLE_IN_0_16 +GArrowDataType *garrow_large_list_array_get_value_type(GArrowLargeListArray *array); +GARROW_AVAILABLE_IN_0_16 +GArrowArray *garrow_large_list_array_get_value(GArrowLargeListArray *array, + gint64 i); +GARROW_AVAILABLE_IN_2_0 +GArrowArray *garrow_large_list_array_get_values(GArrowLargeListArray *array); +GARROW_AVAILABLE_IN_2_0 +gint64 garrow_large_list_array_get_value_offset(GArrowLargeListArray *array, + gint64 i); +GARROW_AVAILABLE_IN_2_0 +gint64 garrow_large_list_array_get_value_length(GArrowLargeListArray *array, + gint64 i); +GARROW_AVAILABLE_IN_2_0 +const gint64 * +garrow_large_list_array_get_value_offsets(GArrowLargeListArray *array, + gint64 *n_offsets); + + +#define GARROW_TYPE_STRUCT_ARRAY (garrow_struct_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowStructArray, + garrow_struct_array, + GARROW, + STRUCT_ARRAY, + GArrowArray) +struct _GArrowStructArrayClass +{ + GArrowArrayClass parent_class; +}; + +GArrowStructArray *garrow_struct_array_new(GArrowDataType *data_type, + gint64 length, + GList *fields, + GArrowBuffer *null_bitmap, + gint64 n_nulls); + +GArrowArray *garrow_struct_array_get_field(GArrowStructArray *array, + gint i); + +GList *garrow_struct_array_get_fields(GArrowStructArray *array); + +GARROW_AVAILABLE_IN_0_10 +GList *garrow_struct_array_flatten(GArrowStructArray *array, GError **error); + + +#define GARROW_TYPE_MAP_ARRAY (garrow_map_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowMapArray, + garrow_map_array, + GARROW, + MAP_ARRAY, + GArrowListArray) +struct _GArrowMapArrayClass +{ + GArrowListArrayClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_17 +GArrowMapArray * +garrow_map_array_new(GArrowArray *offsets, + GArrowArray *keys, + GArrowArray *items, + GError **error); +GARROW_AVAILABLE_IN_0_17 +GArrowArray * +garrow_map_array_get_keys(GArrowMapArray *array); +GARROW_AVAILABLE_IN_0_17 +GArrowArray * +garrow_map_array_get_items(GArrowMapArray *array); + + +#define GARROW_TYPE_UNION_ARRAY (garrow_union_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowUnionArray, + garrow_union_array, + GARROW, + UNION_ARRAY, + GArrowArray) +struct _GArrowUnionArrayClass +{ + GArrowArrayClass parent_class; +}; + +GArrowArray * +garrow_union_array_get_field(GArrowUnionArray *array, + gint i); + +#define GARROW_TYPE_SPARSE_UNION_ARRAY (garrow_sparse_union_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionArray, + garrow_sparse_union_array, + GARROW, + SPARSE_UNION_ARRAY, + GArrowUnionArray) +struct _GArrowSparseUnionArrayClass +{ + GArrowUnionArrayClass parent_class; +}; + +GArrowSparseUnionArray * +garrow_sparse_union_array_new(GArrowInt8Array *type_ids, + GList *fields, + GError **error); +GArrowSparseUnionArray * +garrow_sparse_union_array_new_data_type(GArrowSparseUnionDataType *data_type, + GArrowInt8Array *type_ids, + GList *fields, + GError **error); + + +#define GARROW_TYPE_DENSE_UNION_ARRAY (garrow_dense_union_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionArray, + garrow_dense_union_array, + GARROW, + DENSE_UNION_ARRAY, + GArrowUnionArray) +struct _GArrowDenseUnionArrayClass +{ + GArrowUnionArrayClass parent_class; +}; + +GArrowDenseUnionArray * +garrow_dense_union_array_new(GArrowInt8Array *type_ids, + GArrowInt32Array *value_offsets, + GList *fields, + GError **error); +GArrowDenseUnionArray * +garrow_dense_union_array_new_data_type(GArrowDenseUnionDataType *data_type, + GArrowInt8Array *type_ids, + GArrowInt32Array *value_offsets, + GList *fields, + GError **error); + + +#define GARROW_TYPE_DICTIONARY_ARRAY (garrow_dictionary_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDictionaryArray, + garrow_dictionary_array, + GARROW, + DICTIONARY_ARRAY, + GArrowArray) +struct _GArrowDictionaryArrayClass +{ + GArrowArrayClass parent_class; +}; + +GArrowDictionaryArray * +garrow_dictionary_array_new(GArrowDataType *data_type, + GArrowArray *indices, + GArrowArray *dictionary, + GError **error); +GArrowArray * +garrow_dictionary_array_get_indices(GArrowDictionaryArray *array); +GArrowArray * +garrow_dictionary_array_get_dictionary(GArrowDictionaryArray *array); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_1_0_FOR(garrow_array_get_value_data_type) +GArrowDictionaryDataType * +garrow_dictionary_array_get_dictionary_data_type(GArrowDictionaryArray *array); +#endif + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/composite-data-type.cpp b/src/arrow/c_glib/arrow-glib/composite-data-type.cpp new file mode 100644 index 000000000..fadcafe6b --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/composite-data-type.cpp @@ -0,0 +1,720 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/basic-array.hpp> +#include <arrow-glib/data-type.hpp> +#include <arrow-glib/enums.h> +#include <arrow-glib/error.hpp> +#include <arrow-glib/field.hpp> +#include <arrow-glib/type.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: composite-data-type + * @section_id: composite-data-type-classes + * @title: Composite data type classes + * @include: arrow-glib/arrow-glib.h + * + * #GArrowListDataType is a class for list data type. + * + * #GArrowLargeListDataType is a class for 64-bit offsets list data type. + * + * #GArrowStructDataType is a class for struct data type. + * + * #GArrowMapDataType is a class for map data type. + * + * #GArrowUnionDataType is a base class for union data types. + * + * #GArrowSparseUnionDataType is a class for sparse union data type. + * + * #GArrowDenseUnionDataType is a class for dense union data type. + * + * #GArrowDictionaryDataType is a class for dictionary data type. + */ + +G_DEFINE_TYPE(GArrowListDataType, + garrow_list_data_type, + GARROW_TYPE_DATA_TYPE) + +static void +garrow_list_data_type_init(GArrowListDataType *object) +{ +} + +static void +garrow_list_data_type_class_init(GArrowListDataTypeClass *klass) +{ +} + +/** + * garrow_list_data_type_new: + * @field: The field of elements + * + * Returns: The newly created list data type. + */ +GArrowListDataType * +garrow_list_data_type_new(GArrowField *field) +{ + auto arrow_field = garrow_field_get_raw(field); + auto arrow_data_type = + std::make_shared<arrow::ListType>(arrow_field); + + GArrowListDataType *data_type = + GARROW_LIST_DATA_TYPE(g_object_new(GARROW_TYPE_LIST_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + +/** + * garrow_list_data_type_get_value_field: + * @list_data_type: A #GArrowListDataType. + * + * Returns: (transfer full): The field of value. + * + * Deprecated: 0.13.0: + * Use garrow_list_data_type_get_field() instead. + */ +GArrowField * +garrow_list_data_type_get_value_field(GArrowListDataType *list_data_type) +{ + return garrow_list_data_type_get_field(list_data_type); +} + +/** + * garrow_list_data_type_get_field: + * @list_data_type: A #GArrowListDataType. + * + * Returns: (transfer full): The field of value. + * + * Since: 0.13.0 + */ +GArrowField * +garrow_list_data_type_get_field(GArrowListDataType *list_data_type) +{ + auto data_type = GARROW_DATA_TYPE(list_data_type); + auto arrow_data_type = garrow_data_type_get_raw(data_type); + auto arrow_list_data_type = + static_cast<arrow::ListType *>(arrow_data_type.get()); + + auto arrow_field = arrow_list_data_type->value_field(); + return garrow_field_new_raw(&arrow_field, nullptr); +} + + +G_DEFINE_TYPE(GArrowLargeListDataType, + garrow_large_list_data_type, + GARROW_TYPE_DATA_TYPE) + +static void +garrow_large_list_data_type_init(GArrowLargeListDataType *object) +{ +} + +static void +garrow_large_list_data_type_class_init(GArrowLargeListDataTypeClass *klass) +{ +} + +/** + * garrow_large_list_data_type_new: + * @field: The field of elements + * + * Returns: The newly created large list data type. + * + * Since: 0.16.0 + */ +GArrowLargeListDataType * +garrow_large_list_data_type_new(GArrowField *field) +{ + auto arrow_field = garrow_field_get_raw(field); + auto arrow_data_type = + std::make_shared<arrow::LargeListType>(arrow_field); + + GArrowLargeListDataType *data_type = + GARROW_LARGE_LIST_DATA_TYPE(g_object_new(GARROW_TYPE_LARGE_LIST_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + +/** + * garrow_large_list_data_type_get_field: + * @large_list_data_type: A #GArrowLargeListDataType. + * + * Returns: (transfer full): The field of value. + * + * Since: 0.16.0 + */ +GArrowField * +garrow_large_list_data_type_get_field(GArrowLargeListDataType *large_list_data_type) +{ + auto data_type = GARROW_DATA_TYPE(large_list_data_type); + auto arrow_data_type = garrow_data_type_get_raw(data_type); + auto arrow_large_list_data_type = + static_cast<arrow::LargeListType *>(arrow_data_type.get()); + + auto arrow_field = arrow_large_list_data_type->value_field(); + return garrow_field_new_raw(&arrow_field, nullptr); +} + + +G_DEFINE_TYPE(GArrowStructDataType, + garrow_struct_data_type, + GARROW_TYPE_DATA_TYPE) + +static void +garrow_struct_data_type_init(GArrowStructDataType *object) +{ +} + +static void +garrow_struct_data_type_class_init(GArrowStructDataTypeClass *klass) +{ +} + +/** + * garrow_struct_data_type_new: + * @fields: (element-type GArrowField): The fields of the struct. + * + * Returns: The newly created struct data type. + */ +GArrowStructDataType * +garrow_struct_data_type_new(GList *fields) +{ + std::vector<std::shared_ptr<arrow::Field>> arrow_fields; + for (auto *node = fields; node; node = g_list_next(node)) { + auto field = GARROW_FIELD(node->data); + auto arrow_field = garrow_field_get_raw(field); + arrow_fields.push_back(arrow_field); + } + + auto arrow_data_type = std::make_shared<arrow::StructType>(arrow_fields); + auto data_type = g_object_new(GARROW_TYPE_STRUCT_DATA_TYPE, + "data-type", &arrow_data_type, + NULL); + return GARROW_STRUCT_DATA_TYPE(data_type); +} + +/** + * garrow_struct_data_type_get_n_fields: + * @struct_data_type: A #GArrowStructDataType. + * + * Returns: The number of fields of the struct data type. + * + * Since: 0.12.0 + */ +gint +garrow_struct_data_type_get_n_fields(GArrowStructDataType *struct_data_type) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(struct_data_type)); + return arrow_data_type->num_fields(); +} + +/** + * garrow_struct_data_type_get_fields: + * @struct_data_type: A #GArrowStructDataType. + * + * Returns: (transfer full) (element-type GArrowField): + * The fields of the struct data type. + * + * Since: 0.12.0 + */ +GList * +garrow_struct_data_type_get_fields(GArrowStructDataType *struct_data_type) +{ + auto data_type = GARROW_DATA_TYPE(struct_data_type); + auto arrow_data_type = garrow_data_type_get_raw(data_type); + auto arrow_fields = arrow_data_type->fields(); + + GList *fields = NULL; + for (auto arrow_field : arrow_fields) { + fields = g_list_prepend(fields, garrow_field_new_raw(&arrow_field, nullptr)); + } + return g_list_reverse(fields); +} + +/** + * garrow_struct_data_type_get_field: + * @struct_data_type: A #GArrowStructDataType. + * @i: The index of the target field. + * + * Returns: (transfer full) (nullable): + * The field at the index in the struct data type or %NULL on not found. + * + * Since: 0.12.0 + */ +GArrowField * +garrow_struct_data_type_get_field(GArrowStructDataType *struct_data_type, + gint i) +{ + auto data_type = GARROW_DATA_TYPE(struct_data_type); + auto arrow_data_type = garrow_data_type_get_raw(data_type); + + if (i < 0) { + i += arrow_data_type->num_fields(); + } + if (i < 0) { + return NULL; + } + if (i >= arrow_data_type->num_fields()) { + return NULL; + } + + auto arrow_field = arrow_data_type->field(i); + if (arrow_field) { + return garrow_field_new_raw(&arrow_field, nullptr); + } else { + return NULL; + } +} + +/** + * garrow_struct_data_type_get_field_by_name: + * @struct_data_type: A #GArrowStructDataType. + * @name: The name of the target field. + * + * Returns: (transfer full) (nullable): + * The field that has the name in the struct data type or %NULL on not found. + * + * Since: 0.12.0 + */ +GArrowField * +garrow_struct_data_type_get_field_by_name(GArrowStructDataType *struct_data_type, + const gchar *name) +{ + auto data_type = GARROW_DATA_TYPE(struct_data_type); + auto arrow_data_type = garrow_data_type_get_raw(data_type); + auto arrow_struct_data_type = + std::static_pointer_cast<arrow::StructType>(arrow_data_type); + + auto arrow_field = arrow_struct_data_type->GetFieldByName(name); + if (arrow_field) { + return garrow_field_new_raw(&arrow_field, nullptr); + } else { + return NULL; + } +} + +/** + * garrow_struct_data_type_get_field_index: + * @struct_data_type: A #GArrowStructDataType. + * @name: The name of the target field. + * + * Returns: The index of the target index in the struct data type + * or `-1` on not found. + * + * Since: 0.12.0 + */ +gint +garrow_struct_data_type_get_field_index(GArrowStructDataType *struct_data_type, + const gchar *name) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(struct_data_type)); + auto arrow_struct_data_type = + std::static_pointer_cast<arrow::StructType>(arrow_data_type); + + return arrow_struct_data_type->GetFieldIndex(name); +} + + +G_DEFINE_TYPE(GArrowMapDataType, + garrow_map_data_type, + GARROW_TYPE_LIST_DATA_TYPE) + +static void +garrow_map_data_type_init(GArrowMapDataType *object) +{ +} + +static void +garrow_map_data_type_class_init(GArrowMapDataTypeClass *klass) +{ +} + +/** + * garrow_map_data_type_new: + * @key_type: The key type of the map. + * @item_type: The item type of the map. + * + * Returns: The newly created map data type. + * + * Since: 0.17.0 + */ +GArrowMapDataType * +garrow_map_data_type_new(GArrowDataType *key_type, + GArrowDataType *item_type) +{ + auto arrow_key_type = garrow_data_type_get_raw(key_type); + auto arrow_item_type = garrow_data_type_get_raw(item_type); + auto arrow_data_type = std::make_shared<arrow::MapType>(arrow_key_type, + arrow_item_type); + auto data_type = g_object_new(GARROW_TYPE_MAP_DATA_TYPE, + "data-type", &arrow_data_type, + NULL); + return GARROW_MAP_DATA_TYPE(data_type); +} + +/** + * garrow_map_data_type_get_key_type: + * @map_data_type: A #GArrowMapDataType. + * + * Returns: (transfer full): The key type of the map. + * + * Since: 0.17.0 + */ +GArrowDataType * +garrow_map_data_type_get_key_type(GArrowMapDataType *map_data_type) +{ + auto data_type = GARROW_DATA_TYPE(map_data_type); + auto arrow_data_type = garrow_data_type_get_raw(data_type); + auto arrow_map_data_type = + std::static_pointer_cast<arrow::MapType>(arrow_data_type); + auto arrow_key_type = arrow_map_data_type->key_type(); + return garrow_data_type_new_raw(&arrow_key_type); +} + +/** + * garrow_map_data_type_get_item_type: + * @map_data_type: A #GArrowMapDataType. + * + * Returns: (transfer full): The item type of the map. + * + * Since: 0.17.0 + */ +GArrowDataType * +garrow_map_data_type_get_item_type(GArrowMapDataType *map_data_type) +{ + auto data_type = GARROW_DATA_TYPE(map_data_type); + auto arrow_data_type = garrow_data_type_get_raw(data_type); + auto arrow_map_data_type = + std::static_pointer_cast<arrow::MapType>(arrow_data_type); + auto arrow_item_type = arrow_map_data_type->item_type(); + return garrow_data_type_new_raw(&arrow_item_type); +} + + +G_DEFINE_ABSTRACT_TYPE(GArrowUnionDataType, + garrow_union_data_type, + GARROW_TYPE_DATA_TYPE) + +static void +garrow_union_data_type_init(GArrowUnionDataType *object) +{ +} + +static void +garrow_union_data_type_class_init(GArrowUnionDataTypeClass *klass) +{ +} + +/** + * garrow_union_data_type_get_n_fields: + * @union_data_type: A #GArrowUnionDataType. + * + * Returns: The number of fields of the union data type. + * + * Since: 0.12.0 + */ +gint +garrow_union_data_type_get_n_fields(GArrowUnionDataType *union_data_type) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(union_data_type)); + return arrow_data_type->num_fields(); +} + +/** + * garrow_union_data_type_get_fields: + * @union_data_type: A #GArrowUnionDataType. + * + * Returns: (transfer full) (element-type GArrowField): + * The fields of the union data type. + * + * Since: 0.12.0 + */ +GList * +garrow_union_data_type_get_fields(GArrowUnionDataType *union_data_type) +{ + auto data_type = GARROW_DATA_TYPE(union_data_type); + auto arrow_data_type = garrow_data_type_get_raw(data_type); + auto arrow_fields = arrow_data_type->fields(); + + GList *fields = NULL; + for (auto arrow_field : arrow_fields) { + fields = g_list_prepend(fields, garrow_field_new_raw(&arrow_field, nullptr)); + } + return g_list_reverse(fields); +} + +/** + * garrow_union_data_type_get_field: + * @union_data_type: A #GArrowUnionDataType. + * @i: The index of the target field. + * + * Returns: (transfer full) (nullable): + * The field at the index in the union data type or %NULL on not found. + * + * Since: 0.12.0 + */ +GArrowField * +garrow_union_data_type_get_field(GArrowUnionDataType *union_data_type, + gint i) +{ + auto data_type = GARROW_DATA_TYPE(union_data_type); + auto arrow_data_type = garrow_data_type_get_raw(data_type); + + if (i < 0) { + i += arrow_data_type->num_fields(); + } + if (i < 0) { + return NULL; + } + if (i >= arrow_data_type->num_fields()) { + return NULL; + } + + auto arrow_field = arrow_data_type->field(i); + if (arrow_field) { + return garrow_field_new_raw(&arrow_field, nullptr); + } else { + return NULL; + } +} + +/** + * garrow_union_data_type_get_type_codes: + * @union_data_type: A #GArrowUnionDataType. + * @n_type_codes: (out): The number of type codes. + * + * Returns: (transfer full) (array length=n_type_codes): + * The codes for each field. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 0.12.0 + */ +gint8 * +garrow_union_data_type_get_type_codes(GArrowUnionDataType *union_data_type, + gsize *n_type_codes) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(union_data_type)); + auto arrow_union_data_type = + std::static_pointer_cast<arrow::UnionType>(arrow_data_type); + + const auto arrow_type_codes = arrow_union_data_type->type_codes(); + const auto n = arrow_type_codes.size(); + auto type_codes = static_cast<gint8 *>(g_new(gint8, n)); + for (size_t i = 0; i < n; ++i) { + type_codes[i] = arrow_type_codes[i]; + } + *n_type_codes = n; + return type_codes; +} + + +G_DEFINE_TYPE(GArrowSparseUnionDataType, + garrow_sparse_union_data_type, + GARROW_TYPE_UNION_DATA_TYPE) + +static void +garrow_sparse_union_data_type_init(GArrowSparseUnionDataType *object) +{ +} + +static void +garrow_sparse_union_data_type_class_init(GArrowSparseUnionDataTypeClass *klass) +{ +} + +/** + * garrow_sparse_union_data_type_new: + * @fields: (element-type GArrowField): The fields of the union. + * @type_codes: (array length=n_type_codes): The codes to specify each field. + * @n_type_codes: The number of type codes. + * + * Returns: The newly created sparse union data type. + */ +GArrowSparseUnionDataType * +garrow_sparse_union_data_type_new(GList *fields, + gint8 *type_codes, + gsize n_type_codes) +{ + std::vector<std::shared_ptr<arrow::Field>> arrow_fields; + for (auto node = fields; node; node = g_list_next(node)) { + auto field = GARROW_FIELD(node->data); + auto arrow_field = garrow_field_get_raw(field); + arrow_fields.push_back(arrow_field); + } + + std::vector<int8_t> arrow_type_codes; + for (gsize i = 0; i < n_type_codes; ++i) { + arrow_type_codes.push_back(type_codes[i]); + } + + auto arrow_data_type = + std::make_shared<arrow::SparseUnionType>(arrow_fields, + arrow_type_codes); + auto data_type = g_object_new(GARROW_TYPE_SPARSE_UNION_DATA_TYPE, + "data-type", &arrow_data_type, + NULL); + return GARROW_SPARSE_UNION_DATA_TYPE(data_type); +} + + +G_DEFINE_TYPE(GArrowDenseUnionDataType, + garrow_dense_union_data_type, + GARROW_TYPE_UNION_DATA_TYPE) + +static void +garrow_dense_union_data_type_init(GArrowDenseUnionDataType *object) +{ +} + +static void +garrow_dense_union_data_type_class_init(GArrowDenseUnionDataTypeClass *klass) +{ +} + +/** + * garrow_dense_union_data_type_new: + * @fields: (element-type GArrowField): The fields of the union. + * @type_codes: (array length=n_type_codes): The codes to specify each field. + * @n_type_codes: The number of type codes. + * + * Returns: The newly created dense union data type. + */ +GArrowDenseUnionDataType * +garrow_dense_union_data_type_new(GList *fields, + gint8 *type_codes, + gsize n_type_codes) +{ + std::vector<std::shared_ptr<arrow::Field>> arrow_fields; + for (auto node = fields; node; node = g_list_next(node)) { + auto field = GARROW_FIELD(node->data); + auto arrow_field = garrow_field_get_raw(field); + arrow_fields.push_back(arrow_field); + } + + std::vector<int8_t> arrow_type_codes; + for (gsize i = 0; i < n_type_codes; ++i) { + arrow_type_codes.push_back(type_codes[i]); + } + + auto arrow_data_type = + std::make_shared<arrow::DenseUnionType>(arrow_fields, + arrow_type_codes); + auto data_type = g_object_new(GARROW_TYPE_DENSE_UNION_DATA_TYPE, + "data-type", &arrow_data_type, + NULL); + return GARROW_DENSE_UNION_DATA_TYPE(data_type); +} + + +G_DEFINE_TYPE(GArrowDictionaryDataType, + garrow_dictionary_data_type, + GARROW_TYPE_FIXED_WIDTH_DATA_TYPE) + +static void +garrow_dictionary_data_type_init(GArrowDictionaryDataType *object) +{ +} + +static void +garrow_dictionary_data_type_class_init(GArrowDictionaryDataTypeClass *klass) +{ +} + +/** + * garrow_dictionary_data_type_new: + * @index_data_type: The data type of index. + * @value_data_type: The data type of dictionary values. + * @ordered: Whether dictionary contents are ordered or not. + * + * Returns: The newly created dictionary data type. + * + * Since: 0.8.0 + */ +GArrowDictionaryDataType * +garrow_dictionary_data_type_new(GArrowDataType *index_data_type, + GArrowDataType *value_data_type, + gboolean ordered) +{ + auto arrow_index_data_type = garrow_data_type_get_raw(index_data_type); + auto arrow_value_data_type = garrow_data_type_get_raw(value_data_type); + auto arrow_data_type = arrow::dictionary(arrow_index_data_type, + arrow_value_data_type, + ordered); + return GARROW_DICTIONARY_DATA_TYPE(garrow_data_type_new_raw(&arrow_data_type)); +} + +/** + * garrow_dictionary_data_type_get_index_data_type: + * @dictionary_data_type: The #GArrowDictionaryDataType. + * + * Returns: (transfer full): The #GArrowDataType of index. + * + * Since: 0.8.0 + */ +GArrowDataType * +garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType *dictionary_data_type) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(dictionary_data_type)); + auto arrow_dictionary_data_type = + std::static_pointer_cast<arrow::DictionaryType>(arrow_data_type); + auto arrow_index_data_type = arrow_dictionary_data_type->index_type(); + return garrow_data_type_new_raw(&arrow_index_data_type); +} + +/** + * garrow_dictionary_data_type_get_value_data_type: + * @dictionary_data_type: The #GArrowDictionaryDataType. + * + * Returns: (transfer full): The #GArrowDataType of dictionary values. + * + * Since: 0.14.0 + */ +GArrowDataType * +garrow_dictionary_data_type_get_value_data_type(GArrowDictionaryDataType *dictionary_data_type) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(dictionary_data_type)); + auto arrow_dictionary_data_type = + std::static_pointer_cast<arrow::DictionaryType>(arrow_data_type); + auto arrow_value_data_type = arrow_dictionary_data_type->value_type(); + return garrow_data_type_new_raw(&arrow_value_data_type); +} + +/** + * garrow_dictionary_data_type_is_ordered: + * @dictionary_data_type: The #GArrowDictionaryDataType. + * + * Returns: Whether dictionary contents are ordered or not. + * + * Since: 0.8.0 + */ +gboolean +garrow_dictionary_data_type_is_ordered(GArrowDictionaryDataType *dictionary_data_type) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(dictionary_data_type)); + auto arrow_dictionary_data_type = + std::static_pointer_cast<arrow::DictionaryType>(arrow_data_type); + return arrow_dictionary_data_type->ordered(); +} + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/composite-data-type.h b/src/arrow/c_glib/arrow-glib/composite-data-type.h new file mode 100644 index 000000000..443347b50 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/composite-data-type.h @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/basic-array.h> +#include <arrow-glib/basic-data-type.h> +#include <arrow-glib/field.h> +#include <arrow-glib/version.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_LIST_DATA_TYPE (garrow_list_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowListDataType, + garrow_list_data_type, + GARROW, + LIST_DATA_TYPE, + GArrowDataType) +struct _GArrowListDataTypeClass +{ + GArrowDataTypeClass parent_class; +}; + +GArrowListDataType *garrow_list_data_type_new (GArrowField *field); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_13_FOR(garrow_list_data_type_get_field) +GArrowField *garrow_list_data_type_get_value_field (GArrowListDataType *list_data_type); +#endif +GARROW_AVAILABLE_IN_0_13 +GArrowField *garrow_list_data_type_get_field (GArrowListDataType *list_data_type); + + +#define GARROW_TYPE_LARGE_LIST_DATA_TYPE (garrow_large_list_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowLargeListDataType, + garrow_large_list_data_type, + GARROW, + LARGE_LIST_DATA_TYPE, + GArrowDataType) +struct _GArrowLargeListDataTypeClass +{ + GArrowDataTypeClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_16 +GArrowLargeListDataType *garrow_large_list_data_type_new(GArrowField *field); +GARROW_AVAILABLE_IN_0_16 +GArrowField *garrow_large_list_data_type_get_field(GArrowLargeListDataType *large_list_data_type); + + +#define GARROW_TYPE_STRUCT_DATA_TYPE (garrow_struct_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowStructDataType, + garrow_struct_data_type, + GARROW, + STRUCT_DATA_TYPE, + GArrowDataType) +struct _GArrowStructDataTypeClass +{ + GArrowDataTypeClass parent_class; +}; + +GArrowStructDataType *garrow_struct_data_type_new (GList *fields); +gint +garrow_struct_data_type_get_n_fields(GArrowStructDataType *struct_data_type); +GList * +garrow_struct_data_type_get_fields(GArrowStructDataType *struct_data_type); +GArrowField * +garrow_struct_data_type_get_field(GArrowStructDataType *struct_data_type, + gint i); +GArrowField * +garrow_struct_data_type_get_field_by_name(GArrowStructDataType *struct_data_type, + const gchar *name); +gint +garrow_struct_data_type_get_field_index(GArrowStructDataType *struct_data_type, + const gchar *name); + + +#define GARROW_TYPE_MAP_DATA_TYPE (garrow_map_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowMapDataType, + garrow_map_data_type, + GARROW, + MAP_DATA_TYPE, + GArrowListDataType) +struct _GArrowMapDataTypeClass +{ + GArrowListDataTypeClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_17 +GArrowMapDataType * +garrow_map_data_type_new(GArrowDataType *key_type, + GArrowDataType *item_type); +GARROW_AVAILABLE_IN_0_17 +GArrowDataType * +garrow_map_data_type_get_key_type(GArrowMapDataType *map_data_type); +GARROW_AVAILABLE_IN_0_17 +GArrowDataType * +garrow_map_data_type_get_item_type(GArrowMapDataType *map_data_type); + + +#define GARROW_TYPE_UNION_DATA_TYPE (garrow_union_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowUnionDataType, + garrow_union_data_type, + GARROW, + UNION_DATA_TYPE, + GArrowDataType) +struct _GArrowUnionDataTypeClass +{ + GArrowDataTypeClass parent_class; +}; + +gint +garrow_union_data_type_get_n_fields(GArrowUnionDataType *union_data_type); +GList * +garrow_union_data_type_get_fields(GArrowUnionDataType *union_data_type); +GArrowField * +garrow_union_data_type_get_field(GArrowUnionDataType *union_data_type, + gint i); +gint8 * +garrow_union_data_type_get_type_codes(GArrowUnionDataType *union_data_type, + gsize *n_type_codes); + + +#define GARROW_TYPE_SPARSE_UNION_DATA_TYPE \ + (garrow_sparse_union_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionDataType, + garrow_sparse_union_data_type, + GARROW, + SPARSE_UNION_DATA_TYPE, + GArrowUnionDataType) +struct _GArrowSparseUnionDataTypeClass +{ + GArrowUnionDataTypeClass parent_class; +}; + +GArrowSparseUnionDataType * +garrow_sparse_union_data_type_new(GList *fields, + gint8 *type_codes, + gsize n_type_codes); + + +#define GARROW_TYPE_DENSE_UNION_DATA_TYPE \ + (garrow_dense_union_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionDataType, + garrow_dense_union_data_type, + GARROW, + DENSE_UNION_DATA_TYPE, + GArrowUnionDataType) +struct _GArrowDenseUnionDataTypeClass +{ + GArrowUnionDataTypeClass parent_class; +}; + +GArrowDenseUnionDataType * +garrow_dense_union_data_type_new(GList *fields, + gint8 *type_codes, + gsize n_type_codes); + + +#define GARROW_TYPE_DICTIONARY_DATA_TYPE (garrow_dictionary_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDictionaryDataType, + garrow_dictionary_data_type, + GARROW, + DICTIONARY_DATA_TYPE, + GArrowFixedWidthDataType) +struct _GArrowDictionaryDataTypeClass +{ + GArrowFixedWidthDataTypeClass parent_class; +}; + +GArrowDictionaryDataType * +garrow_dictionary_data_type_new(GArrowDataType *index_data_type, + GArrowDataType *value_data_type, + gboolean ordered); +GArrowDataType * +garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType *dictionary_data_type); +GARROW_AVAILABLE_IN_0_14 +GArrowDataType * +garrow_dictionary_data_type_get_value_data_type(GArrowDictionaryDataType *dictionary_data_type); +gboolean +garrow_dictionary_data_type_is_ordered(GArrowDictionaryDataType *dictionary_data_type); + + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/compute.cpp b/src/arrow/c_glib/arrow-glib/compute.cpp new file mode 100644 index 000000000..cdfc96a5a --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/compute.cpp @@ -0,0 +1,4258 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <sstream> + +#include <arrow-glib/array.hpp> +#include <arrow-glib/compute.hpp> +#include <arrow-glib/chunked-array.hpp> +#include <arrow-glib/data-type.hpp> +#include <arrow-glib/datum.hpp> +#include <arrow-glib/enums.h> +#include <arrow-glib/error.hpp> +#include <arrow-glib/reader.hpp> +#include <arrow-glib/record-batch.hpp> +#include <arrow-glib/schema.hpp> +#include <arrow-glib/table.hpp> + +#include <arrow/compute/exec/exec_plan.h> +#include <arrow/compute/exec/options.h> + +template <typename ArrowType, typename GArrowArrayType> +typename ArrowType::c_type +garrow_numeric_array_sum(GArrowArrayType array, + GError **error, + const gchar *tag, + typename ArrowType::c_type default_value) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_sum_datum = arrow::compute::Sum(arrow_array); + if (garrow::check(error, arrow_sum_datum, tag)) { + using ScalarType = typename arrow::TypeTraits<ArrowType>::ScalarType; + auto arrow_numeric_scalar = + std::dynamic_pointer_cast<ScalarType>((*arrow_sum_datum).scalar()); + if (arrow_numeric_scalar->is_valid) { + return arrow_numeric_scalar->value; + } else { + return default_value; + } + } else { + return default_value; + } +} + +template <typename GArrowTypeNewRaw> +auto +garrow_take(arrow::Datum arrow_values, + arrow::Datum arrow_indices, + GArrowTakeOptions *options, + GArrowTypeNewRaw garrow_type_new_raw, + GError **error, + const gchar *tag) -> decltype(garrow_type_new_raw(arrow::Datum())) +{ + arrow::Result<arrow::Datum> arrow_taken_datum; + if (options) { + auto arrow_options = garrow_take_options_get_raw(options); + arrow_taken_datum = arrow::compute::Take(arrow_values, + arrow_indices, + *arrow_options); + } else { + arrow_taken_datum = arrow::compute::Take(arrow_values, + arrow_indices); + } + if (garrow::check(error, arrow_taken_datum, tag)) { + return garrow_type_new_raw(*arrow_taken_datum); + } else { + return NULL; + } +} + +namespace { + gboolean + garrow_field_refs_add(std::vector<arrow::FieldRef> &arrow_field_refs, + const gchar *string, + GError **error, + const gchar *tag) + { + if (string[0] == '.' || string[0] == '[') { + auto arrow_field_ref_result = arrow::FieldRef::FromDotPath(string); + if (!garrow::check(error, arrow_field_ref_result, tag)) { + return false; + } + arrow_field_refs.push_back(std::move(*arrow_field_ref_result)); + } else { + arrow_field_refs.emplace_back(string); + } + return true; + } + + bool + garrow_sort_key_equal_raw(const arrow::compute::SortKey &sort_key, + const arrow::compute::SortKey &other_sort_key) { + return + (sort_key.name == other_sort_key.name) && + (sort_key.order == other_sort_key.order); + + } +} + +G_BEGIN_DECLS + +/** + * SECTION: compute + * @section_id: compute + * @title: Computation on data + * @include: arrow-glib/arrow-glib.h + * + * #GArrowExecuteContext is a class to customize how to execute a + * function. + * + * #GArrowFunctionOptions is a base class for all function options + * classes such as #GArrowCastOptions. + * + * #GArrowFunctionDoc is a class for function document. + * + * #GArrowFunction is a class to process data. + * + * #GArrowExecuteNodeOptions is a base class for all execute node + * options classes such as #GArrowSourceNodeOptions. + * + * #GArrowSourceNodeOptions is a class to customize a source node. + * + * #GArrowAggregation is a class to specify how to aggregate. + * + * #GArrowAggregateNodeOptions is a class to customize an aggregate node. + * + * #GArrowSinkNodeOptions is a class to customize a sink node. + * + * #GArrowExecuteNode is a class to execute an operation. + * + * #GArrowExecutePlan is a class to execute operations. + * + * #GArrowCastOptions is a class to customize the `cast` function and + * garrow_array_cast(). + * + * #GArrowScalarAggregateOptions is a class to customize the scalar + * aggregate functions such as `count` function and convenient + * functions of them such as garrow_array_count(). + * + * #GArrowCountOptions is a class to customize the `count` function and + * garrow_array_count() family. + * + * #GArrowFilterOptions is a class to customize the `filter` function and + * garrow_array_filter() family. + * + * #GArrowTakeOptions is a class to customize the `take` function and + * garrow_array_take() family. + * + * #GArrowArraySortOptions is a class to customize the + * `array_sort_indices` function. + * + * #GArrowSortOptions is a class to customize the `sort_indices` + * function. + * + * #GArrowSetLookupOptions is a class to customize the `is_in` function + * and `index_in` function. + * + * #GArrowVarianceOptions is a class to customize the `stddev` function + * and `variance` function. + * + * There are many functions to compute data on an array. + */ + +typedef struct GArrowExecuteContextPrivate_ { + arrow::compute::ExecContext context; +} GArrowExecuteContextPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowExecuteContext, + garrow_execute_context, + G_TYPE_OBJECT) + +#define GARROW_EXECUTE_CONTEXT_GET_PRIVATE(object) \ + static_cast<GArrowExecuteContextPrivate *>( \ + garrow_execute_context_get_instance_private( \ + GARROW_EXECUTE_CONTEXT(object))) + +static void +garrow_execute_context_finalize(GObject *object) +{ + auto priv = GARROW_EXECUTE_CONTEXT_GET_PRIVATE(object); + priv->context.~ExecContext(); + G_OBJECT_CLASS(garrow_execute_context_parent_class)->finalize(object); +} + +static void +garrow_execute_context_init(GArrowExecuteContext *object) +{ + auto priv = GARROW_EXECUTE_CONTEXT_GET_PRIVATE(object); + new(&priv->context) arrow::compute::ExecContext(arrow::default_memory_pool(), + nullptr); +} + +static void +garrow_execute_context_class_init(GArrowExecuteContextClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_execute_context_finalize; +} + +/** + * garrow_execute_context_new: + * + * Returns: A newly created #GArrowExecuteContext. + * + * Since: 1.0.0 + */ +GArrowExecuteContext * +garrow_execute_context_new(void) +{ + auto execute_context = g_object_new(GARROW_TYPE_EXECUTE_CONTEXT, NULL); + return GARROW_EXECUTE_CONTEXT(execute_context); +} + + +typedef struct GArrowFunctionOptionsPrivate_ { + arrow::compute::FunctionOptions *options; +} GArrowFunctionOptionsPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowFunctionOptions, + garrow_function_options, + G_TYPE_OBJECT) + +#define GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object) \ + static_cast<GArrowFunctionOptionsPrivate *>( \ + garrow_function_options_get_instance_private( \ + GARROW_FUNCTION_OPTIONS(object))) + +static void +garrow_function_options_finalize(GObject *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + delete priv->options; + G_OBJECT_CLASS(garrow_function_options_parent_class)->finalize(object); +} + +static void +garrow_function_options_init(GArrowFunctionOptions *object) +{ +} + +static void +garrow_function_options_class_init(GArrowFunctionOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->finalize = garrow_function_options_finalize; +} + + +typedef struct GArrowFunctionDocPrivate_ { + arrow::compute::FunctionDoc *doc; +} GArrowFunctionDocPrivate; + +enum { + PROP_DOC = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowFunctionDoc, + garrow_function_doc, + G_TYPE_OBJECT) + +#define GARROW_FUNCTION_DOC_GET_PRIVATE(object) \ + static_cast<GArrowFunctionDocPrivate *>( \ + garrow_function_doc_get_instance_private( \ + GARROW_FUNCTION_DOC(object))) + +static void +garrow_function_doc_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_FUNCTION_DOC_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_DOC: + priv->doc = + static_cast<arrow::compute::FunctionDoc *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_function_doc_init(GArrowFunctionDoc *object) +{ +} + +static void +garrow_function_doc_class_init(GArrowFunctionDocClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->set_property = garrow_function_doc_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("doc", + "Doc", + "The raw arrow::compute::FunctionDoc *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_DOC, spec); +} + +/** + * garrow_function_doc_get_summary: + * @doc: A #GArrowFunctionDoc. + * + * Returns: A one-line summary of the function, using a verb. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 6.0.0 + */ +gchar * +garrow_function_doc_get_summary(GArrowFunctionDoc *doc) +{ + auto arrow_doc = garrow_function_doc_get_raw(doc); + return g_strndup(arrow_doc->summary.data(), + arrow_doc->summary.size()); +} + +/** + * garrow_function_doc_get_description: + * @doc: A #GArrowFunctionDoc. + * + * Returns: A detailed description of the function, meant to follow + * the summary. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 6.0.0 + */ +gchar * +garrow_function_doc_get_description(GArrowFunctionDoc *doc) +{ + auto arrow_doc = garrow_function_doc_get_raw(doc); + return g_strndup(arrow_doc->description.data(), + arrow_doc->description.size()); +} + +/** + * garrow_function_doc_get_arg_names: + * @doc: A #GArrowFunctionDoc. + * + * Returns: (array zero-terminated=1) (element-type utf8) (transfer full): + * Symbolic names (identifiers) for the function arguments. + * + * It's a %NULL-terminated string array. It must be freed with + * g_strfreev() when no longer needed. + * + * Since: 6.0.0 + */ +gchar ** +garrow_function_doc_get_arg_names(GArrowFunctionDoc *doc) +{ + auto arrow_doc = garrow_function_doc_get_raw(doc); + const auto &arrow_arg_names = arrow_doc->arg_names; + auto n = arrow_arg_names.size(); + auto arg_names = g_new(gchar *, n + 1); + for (size_t i = 0; i < n; ++i) { + arg_names[i] = g_strndup(arrow_arg_names[i].data(), + arrow_arg_names[i].size()); + } + arg_names[n] = NULL; + return arg_names; +} + +/** + * garrow_function_doc_get_options_class_name: + * @doc: A #GArrowFunctionDoc. + * + * Returns: Name of the options class, if any. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 6.0.0 + */ +gchar * +garrow_function_doc_get_options_class_name(GArrowFunctionDoc *doc) +{ + auto arrow_doc = garrow_function_doc_get_raw(doc); + return g_strndup(arrow_doc->options_class.data(), + arrow_doc->options_class.size()); +} + + +typedef struct GArrowFunctionPrivate_ { + std::shared_ptr<arrow::compute::Function> function; +} GArrowFunctionPrivate; + +enum { + PROP_FUNCTION = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowFunction, + garrow_function, + G_TYPE_OBJECT) + +#define GARROW_FUNCTION_GET_PRIVATE(object) \ + static_cast<GArrowFunctionPrivate *>( \ + garrow_function_get_instance_private( \ + GARROW_FUNCTION(object))) + +static void +garrow_function_finalize(GObject *object) +{ + auto priv = GARROW_FUNCTION_GET_PRIVATE(object); + priv->function.~shared_ptr(); + G_OBJECT_CLASS(garrow_function_parent_class)->finalize(object); +} + +static void +garrow_function_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_FUNCTION_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_FUNCTION: + priv->function = + *static_cast<std::shared_ptr<arrow::compute::Function> *>( + g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_function_init(GArrowFunction *object) +{ + auto priv = GARROW_FUNCTION_GET_PRIVATE(object); + new(&priv->function) std::shared_ptr<arrow::compute::Function>; +} + +static void +garrow_function_class_init(GArrowFunctionClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_function_finalize; + gobject_class->set_property = garrow_function_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("function", + "Function", + "The raw std::shared<arrow::compute::Function> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_FUNCTION, spec); +} + +/** + * garrow_function_find: + * @name: A function name to be found. + * + * Returns: (transfer full): + * The found #GArrowFunction or %NULL on not found. + * + * Since: 1.0.0 + */ +GArrowFunction * +garrow_function_find(const gchar *name) +{ + auto arrow_function_registry = arrow::compute::GetFunctionRegistry(); + auto arrow_function_result = arrow_function_registry->GetFunction(name); + if (!arrow_function_result.ok()) { + return NULL; + } + auto arrow_function = *arrow_function_result; + return garrow_function_new_raw(&arrow_function); +} + +/** + * garrow_function_execute: + * @function: A #GArrowFunction. + * @args: (element-type GArrowDatum): A list of #GArrowDatum. + * @options: (nullable): Options for the execution as an object that + * implements #GArrowFunctionOptions. + * @context: (nullable): A #GArrowExecuteContext for the execution. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): + * A return value of the execution as #GArrowDatum on success, %NULL on error. + * + * Since: 1.0.0 + */ +GArrowDatum * +garrow_function_execute(GArrowFunction *function, + GList *args, + GArrowFunctionOptions *options, + GArrowExecuteContext *context, + GError **error) +{ + auto arrow_function = garrow_function_get_raw(function); + std::vector<arrow::Datum> arrow_args; + for (GList *node = args; node; node = node->next) { + GArrowDatum *datum = GARROW_DATUM(node->data); + arrow_args.push_back(garrow_datum_get_raw(datum)); + } + const arrow::compute::FunctionOptions *arrow_options; + if (options) { + arrow_options = garrow_function_options_get_raw(options); + } else { + arrow_options = arrow_function->default_options(); + } + arrow::Result<arrow::Datum> arrow_result_result; + if (context) { + auto arrow_context = garrow_execute_context_get_raw(context); + arrow_result_result = arrow_function->Execute(arrow_args, + arrow_options, + arrow_context); + } else { + arrow::compute::ExecContext arrow_context; + arrow_result_result = arrow_function->Execute(arrow_args, + arrow_options, + &arrow_context); + } + if (garrow::check(error, arrow_result_result, "[function][execute]")) { + auto arrow_result = *arrow_result_result; + return garrow_datum_new_raw(&arrow_result); + } else { + return NULL; + } +} + +/** + * garrow_function_get_doc: + * @function: A #GArrowFunction. + * + * Returns: (transfer full): The function documentation. + * + * Since: 6.0.0 + */ +GArrowFunctionDoc * +garrow_function_get_doc(GArrowFunction *function) +{ + auto arrow_function = garrow_function_get_raw(function); + const auto &arrow_doc = arrow_function->doc(); + return garrow_function_doc_new_raw(&arrow_doc); +} + + +typedef struct GArrowExecuteNodeOptionsPrivate_ { + arrow::compute::ExecNodeOptions *options; +} GArrowExecuteNodeOptionsPrivate; + +enum { + PROP_EXECUTE_NODE_OPTIONS = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowExecuteNodeOptions, + garrow_execute_node_options, + G_TYPE_OBJECT) + +#define GARROW_EXECUTE_NODE_OPTIONS_GET_PRIVATE(object) \ + static_cast<GArrowExecuteNodeOptionsPrivate *>( \ + garrow_execute_node_options_get_instance_private( \ + GARROW_EXECUTE_NODE_OPTIONS(object))) + +static void +garrow_execute_node_options_finalize(GObject *object) +{ + auto priv = GARROW_EXECUTE_NODE_OPTIONS_GET_PRIVATE(object); + delete priv->options; + G_OBJECT_CLASS(garrow_execute_node_options_parent_class)->finalize(object); +} + +static void +garrow_execute_node_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_EXECUTE_NODE_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_FUNCTION: + priv->options = + static_cast<arrow::compute::ExecNodeOptions *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_execute_node_options_init(GArrowExecuteNodeOptions *object) +{ + auto priv = GARROW_EXECUTE_NODE_OPTIONS_GET_PRIVATE(object); + priv->options = nullptr; +} + +static void +garrow_execute_node_options_class_init(GArrowExecuteNodeOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->finalize = garrow_execute_node_options_finalize; + gobject_class->set_property = garrow_execute_node_options_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("options", + "Options", + "The raw arrow::compute::ExecNodeOptions *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, + PROP_EXECUTE_NODE_OPTIONS, + spec); +} + + +typedef struct GArrowSourceNodeOptionsPrivate_ { + GArrowRecordBatchReader *reader; + GArrowRecordBatch *record_batch; +} GArrowSourceNodeOptionsPrivate; + +enum { + PROP_READER = 1, + PROP_RECORD_BATCH, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowSourceNodeOptions, + garrow_source_node_options, + GARROW_TYPE_EXECUTE_NODE_OPTIONS) + +#define GARROW_SOURCE_NODE_OPTIONS_GET_PRIVATE(object) \ + static_cast<GArrowSourceNodeOptionsPrivate *>( \ + garrow_source_node_options_get_instance_private( \ + GARROW_SOURCE_NODE_OPTIONS(object))) + +static void +garrow_source_node_options_dispose(GObject *object) +{ + auto priv = GARROW_SOURCE_NODE_OPTIONS_GET_PRIVATE(object); + + if (priv->reader) { + g_object_unref(priv->reader); + priv->reader = nullptr; + } + + if (priv->record_batch) { + g_object_unref(priv->record_batch); + priv->record_batch = nullptr; + } + + G_OBJECT_CLASS(garrow_source_node_options_parent_class)->dispose(object); +} + +static void +garrow_source_node_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_SOURCE_NODE_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_READER: + priv->reader = GARROW_RECORD_BATCH_READER(g_value_dup_object(value)); + break; + case PROP_RECORD_BATCH: + priv->record_batch = GARROW_RECORD_BATCH(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_source_node_options_init(GArrowSourceNodeOptions *object) +{ +} + +static void +garrow_source_node_options_class_init(GArrowSourceNodeOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->dispose = garrow_source_node_options_dispose; + gobject_class->set_property = garrow_source_node_options_set_property; + + GParamSpec *spec; + spec = g_param_spec_object("reader", + "Reader", + "The GArrowRecordBatchReader that produces " + "record batches", + GARROW_TYPE_RECORD_BATCH_READER, + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_READER, spec); + + spec = g_param_spec_object("record-batch", + "Record batch", + "The GArrowRecordBatch to be produced", + GARROW_TYPE_RECORD_BATCH, + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_RECORD_BATCH, spec); +} + +/** + * garrow_source_node_options_new_record_batch_reader: + * @reader: A #GArrowRecordBatchReader. + * + * Returns: A newly created #GArrowSourceNodeOptions. + * + * Since: 6.0.0 + */ +GArrowSourceNodeOptions * +garrow_source_node_options_new_record_batch_reader( + GArrowRecordBatchReader *reader) +{ + auto arrow_reader = garrow_record_batch_reader_get_raw(reader); + auto arrow_options = new arrow::compute::SourceNodeOptions( + arrow_reader->schema(), + [arrow_reader]() { + using ExecBatch = arrow::compute::ExecBatch; + using ExecBatchOptional = arrow::util::optional<ExecBatch>; + auto arrow_record_batch_result = arrow_reader->Next(); + if (!arrow_record_batch_result.ok()) { + return arrow::AsyncGeneratorEnd<ExecBatchOptional>(); + } + auto arrow_record_batch = std::move(*arrow_record_batch_result); + if (!arrow_record_batch) { + return arrow::AsyncGeneratorEnd<ExecBatchOptional>(); + } + return arrow::Future<ExecBatchOptional>::MakeFinished( + ExecBatch(*arrow_record_batch)); + }); + auto options = g_object_new(GARROW_TYPE_SOURCE_NODE_OPTIONS, + "options", arrow_options, + "reader", reader, + NULL); + return GARROW_SOURCE_NODE_OPTIONS(options); +} + +/** + * garrow_source_node_options_new_record_batch: + * @record_batch: A #GArrowRecordBatch. + * + * Returns: A newly created #GArrowSourceNodeOptions. + * + * Since: 6.0.0 + */ +GArrowSourceNodeOptions * +garrow_source_node_options_new_record_batch(GArrowRecordBatch *record_batch) +{ + struct State { + std::shared_ptr<arrow::RecordBatch> record_batch; + bool generated; + }; + auto state = std::make_shared<State>(); + state->record_batch = garrow_record_batch_get_raw(record_batch); + state->generated = false; + auto arrow_options = new arrow::compute::SourceNodeOptions( + state->record_batch->schema(), + [state]() { + using ExecBatch = arrow::compute::ExecBatch; + using ExecBatchOptional = arrow::util::optional<ExecBatch>; + if (!state->generated) { + state->generated = true; + return arrow::Future<ExecBatchOptional>::MakeFinished( + ExecBatch(*(state->record_batch))); + } else { + return arrow::AsyncGeneratorEnd<ExecBatchOptional>(); + } + }); + auto options = g_object_new(GARROW_TYPE_SOURCE_NODE_OPTIONS, + "options", arrow_options, + "record-batch", record_batch, + NULL); + return GARROW_SOURCE_NODE_OPTIONS(options); +} + +/** + * garrow_source_node_options_new_table: + * @table: A #GArrowTable. + * + * Returns: A newly created #GArrowSourceNodeOptions. + * + * Since: 6.0.0 + */ +GArrowSourceNodeOptions * +garrow_source_node_options_new_table(GArrowTable *table) +{ + auto reader = garrow_table_batch_reader_new(table); + auto options = garrow_source_node_options_new_record_batch_reader( + GARROW_RECORD_BATCH_READER(reader)); + g_object_unref(reader); + return options; +} + + +typedef struct GArrowAggregationPrivate_ { + gchar *function; + GArrowFunctionOptions *options; + gchar *input; + gchar *output; +} GArrowAggregationPrivate; + +enum { + PROP_AGGREGATION_FUNCTION = 1, + PROP_AGGREGATION_OPTIONS, + PROP_AGGREGATION_INPUT, + PROP_AGGREGATION_OUTPUT, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowAggregation, + garrow_aggregation, + G_TYPE_OBJECT) + +#define GARROW_AGGREGATION_GET_PRIVATE(object) \ + static_cast<GArrowAggregationPrivate *>( \ + garrow_aggregation_get_instance_private( \ + GARROW_AGGREGATION(object))) + +static void +garrow_aggregation_dispose(GObject *object) +{ + auto priv = GARROW_AGGREGATION_GET_PRIVATE(object); + if (priv->options) { + g_object_unref(priv->options); + priv->options = nullptr; + } + G_OBJECT_CLASS(garrow_aggregation_parent_class)->dispose(object); +} + +static void +garrow_aggregation_finalize(GObject *object) +{ + auto priv = GARROW_AGGREGATION_GET_PRIVATE(object); + g_free(priv->function); + g_free(priv->input); + g_free(priv->output); + G_OBJECT_CLASS(garrow_aggregation_parent_class)->finalize(object); +} + +static void +garrow_aggregation_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_AGGREGATION_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_AGGREGATION_FUNCTION: + priv->function = g_value_dup_string(value); + break; + case PROP_AGGREGATION_OPTIONS: + priv->options = GARROW_FUNCTION_OPTIONS(g_value_dup_object(value)); + break; + case PROP_AGGREGATION_INPUT: + priv->input = g_value_dup_string(value); + break; + case PROP_AGGREGATION_OUTPUT: + priv->output = g_value_dup_string(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_aggregation_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_AGGREGATION_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_AGGREGATION_FUNCTION: + g_value_set_string(value, priv->function); + break; + case PROP_AGGREGATION_OPTIONS: + g_value_set_object(value, priv->options); + break; + case PROP_AGGREGATION_INPUT: + g_value_set_string(value, priv->input); + break; + case PROP_AGGREGATION_OUTPUT: + g_value_set_string(value, priv->output); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_aggregation_init(GArrowAggregation *object) +{ +} + +static void +garrow_aggregation_class_init(GArrowAggregationClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->dispose = garrow_aggregation_dispose; + gobject_class->finalize = garrow_aggregation_finalize; + gobject_class->set_property = garrow_aggregation_set_property; + gobject_class->get_property = garrow_aggregation_get_property; + + GParamSpec *spec; + /** + * GArrowAggregation:function: + * + * The function name to aggregate. + * + * Since: 6.0.0 + */ + spec = g_param_spec_string("function", + "Function", + "The function name to aggregate", + NULL, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, + PROP_AGGREGATION_FUNCTION, + spec); + + /** + * GArrowAggregation:options: + * + * The options of aggregate function. + * + * Since: 6.0.0 + */ + spec = g_param_spec_object("options", + "Options", + "The options of aggregate function", + GARROW_TYPE_FUNCTION_OPTIONS, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, + PROP_AGGREGATION_OPTIONS, + spec); + + /** + * GArrowAggregation:input: + * + * The input field name of aggregate function. + * + * Since: 6.0.0 + */ + spec = g_param_spec_string("input", + "Input", + "The input field name of aggregate function", + NULL, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, + PROP_AGGREGATION_INPUT, + spec); + + /** + * GArrowAggregation:output: + * + * The output field name of aggregate function. + * + * Since: 6.0.0 + */ + spec = g_param_spec_string("output", + "Output", + "The output field name of aggregate function", + NULL, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, + PROP_AGGREGATION_OUTPUT, + spec); +} + +/** + * garrow_aggregation_new: + * @function: A name of aggregate function. + * @options: (nullable): A #GArrowFunctionOptions of aggregate function. + * @input: An input field name of aggregate function. + * @output: An output field name of aggregate function. + * + * Returns: A newly created #GArrowAggregation. + * + * Since: 6.0.0 + */ +GArrowAggregation * +garrow_aggregation_new(const gchar *function, + GArrowFunctionOptions *options, + const gchar *input, + const gchar *output) +{ + return GARROW_AGGREGATION(g_object_new(GARROW_TYPE_AGGREGATION, + "function", function, + "options", options, + "input", input, + "output", output, + NULL)); +} + + +G_DEFINE_TYPE(GArrowAggregateNodeOptions, + garrow_aggregate_node_options, + GARROW_TYPE_EXECUTE_NODE_OPTIONS) + +static void +garrow_aggregate_node_options_init(GArrowAggregateNodeOptions *object) +{ +} + +static void +garrow_aggregate_node_options_class_init(GArrowAggregateNodeOptionsClass *klass) +{ +} + +/** + * garrow_aggregate_node_options_new: + * @aggregations: (element-type GArrowAggregation): A list of #GArrowAggregation. + * @keys: (nullable) (array length=n_keys): Group keys. + * @n_keys: The number of @keys. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: A newly created #GArrowAggregateNodeOptions. + * + * Since: 6.0.0 + */ +GArrowAggregateNodeOptions * +garrow_aggregate_node_options_new(GList *aggregations, + const gchar **keys, + gsize n_keys, + GError **error) +{ + std::vector<arrow::compute::internal::Aggregate> arrow_aggregates; + std::vector<arrow::FieldRef> arrow_targets; + std::vector<std::string> arrow_names; + std::vector<arrow::FieldRef> arrow_keys; + for (auto node = aggregations; node; node = node->next) { + auto aggregation_priv = GARROW_AGGREGATION_GET_PRIVATE(node->data); + arrow::compute::FunctionOptions *function_options = nullptr; + if (aggregation_priv->options) { + function_options = + garrow_function_options_get_raw(aggregation_priv->options); + }; + arrow_aggregates.push_back({aggregation_priv->function, function_options}); + if (!garrow_field_refs_add(arrow_targets, + aggregation_priv->input, + error, + "[aggregate-node-options][new][input]")) { + return NULL; + } + arrow_names.emplace_back(aggregation_priv->output); + } + for (gsize i = 0; i < n_keys; ++i) { + if (!garrow_field_refs_add(arrow_keys, + keys[i], + error, + "[aggregate-node-options][new][key]")) { + return NULL; + } + } + auto arrow_options = + new arrow::compute::AggregateNodeOptions(std::move(arrow_aggregates), + std::move(arrow_targets), + std::move(arrow_names), + std::move(arrow_keys)); + auto options = g_object_new(GARROW_TYPE_AGGREGATE_NODE_OPTIONS, + "options", arrow_options, + NULL); + return GARROW_AGGREGATE_NODE_OPTIONS(options); +} + + +typedef struct GArrowSinkNodeOptionsPrivate_ { + arrow::AsyncGenerator<arrow::util::optional<arrow::compute::ExecBatch>> generator; + GArrowRecordBatchReader *reader; +} GArrowSinkNodeOptionsPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowSinkNodeOptions, + garrow_sink_node_options, + GARROW_TYPE_EXECUTE_NODE_OPTIONS) + +#define GARROW_SINK_NODE_OPTIONS_GET_PRIVATE(object) \ + static_cast<GArrowSinkNodeOptionsPrivate *>( \ + garrow_sink_node_options_get_instance_private( \ + GARROW_SINK_NODE_OPTIONS(object))) + +static void +garrow_sink_node_options_dispose(GObject *object) +{ + auto priv = GARROW_SINK_NODE_OPTIONS_GET_PRIVATE(object); + if (priv->reader) { + g_object_unref(priv->reader); + priv->reader = nullptr; + } + G_OBJECT_CLASS(garrow_sink_node_options_parent_class)->dispose(object); +} + +static void +garrow_sink_node_options_finalize(GObject *object) +{ + auto priv = GARROW_SINK_NODE_OPTIONS_GET_PRIVATE(object); + priv->generator.~function(); + G_OBJECT_CLASS(garrow_sink_node_options_parent_class)->finalize(object); +} + +static void +garrow_sink_node_options_init(GArrowSinkNodeOptions *object) +{ + auto priv = GARROW_SINK_NODE_OPTIONS_GET_PRIVATE(object); + new(&(priv->generator)) + arrow::AsyncGenerator<arrow::util::optional<arrow::compute::ExecBatch>>(); +} + +static void +garrow_sink_node_options_class_init(GArrowSinkNodeOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->dispose = garrow_sink_node_options_dispose; + gobject_class->finalize = garrow_sink_node_options_finalize; +} + +/** + * garrow_sink_node_options_new: + * + * Returns: A newly created #GArrowSinkNodeOptions. + * + * Since: 6.0.0 + */ +GArrowSinkNodeOptions * +garrow_sink_node_options_new(void) +{ + auto options = g_object_new(GARROW_TYPE_SINK_NODE_OPTIONS, NULL); + auto priv = GARROW_SINK_NODE_OPTIONS_GET_PRIVATE(options); + auto arrow_options = new arrow::compute::SinkNodeOptions(&(priv->generator)); + auto execute_node_options_priv = GARROW_EXECUTE_NODE_OPTIONS_GET_PRIVATE(options); + execute_node_options_priv->options = arrow_options; + return GARROW_SINK_NODE_OPTIONS(options); +} + +/** + * garrow_sink_node_options_get_reader: + * @options: A #GArrowSinkNodeOptions. + * @schema: A #GArrowSchema. + * + * Returns: (transfer full): A #GArrowRecordBatchReader to read generated record batches. + * + * Since: 6.0.0 + */ +GArrowRecordBatchReader * +garrow_sink_node_options_get_reader(GArrowSinkNodeOptions *options, + GArrowSchema *schema) +{ + auto arrow_schema = garrow_schema_get_raw(schema); + auto priv = GARROW_SINK_NODE_OPTIONS_GET_PRIVATE(options); + if (!priv->reader) { + auto arrow_reader = + arrow::compute::MakeGeneratorReader(arrow_schema, + std::move(priv->generator), + arrow::default_memory_pool()); + priv->reader = garrow_record_batch_reader_new_raw(&arrow_reader); + } + g_object_ref(priv->reader); + return priv->reader; +} + + +typedef struct GArrowExecuteNodePrivate_ { + arrow::compute::ExecNode *node; +} GArrowExecuteNodePrivate; + +enum { + PROP_NODE = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowExecuteNode, + garrow_execute_node, + G_TYPE_OBJECT) + +#define GARROW_EXECUTE_NODE_GET_PRIVATE(object) \ + static_cast<GArrowExecuteNodePrivate *>( \ + garrow_execute_node_get_instance_private( \ + GARROW_EXECUTE_NODE(object))) + +static void +garrow_execute_node_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_EXECUTE_NODE_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_NODE: + priv->node = + static_cast<arrow::compute::ExecNode *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_execute_node_init(GArrowExecuteNode *object) +{ +} + +static void +garrow_execute_node_class_init(GArrowExecuteNodeClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->set_property = garrow_execute_node_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("node", + "Node", + "The raw arrow::compute::ExecNode *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_NODE, spec); +} + +/** + * garrow_execute_node_get_kind_name: + * @node: A #GArrowExecuteNode. + * + * Returns: The kind name of the node. + * + * Since: 6.0.0 + */ +const gchar * +garrow_execute_node_get_kind_name(GArrowExecuteNode *node) +{ + auto arrow_node = garrow_execute_node_get_raw(node); + return arrow_node->kind_name(); +} + +/** + * garrow_execute_node_get_output_schema: + * @node: A #GArrowExecuteNode. + * + * Returns: (transfer full): The output schema of the node. + * + * Since: 6.0.0 + */ +GArrowSchema * +garrow_execute_node_get_output_schema(GArrowExecuteNode *node) +{ + auto arrow_node = garrow_execute_node_get_raw(node); + std::shared_ptr<arrow::Schema> arrow_schema = arrow_node->output_schema(); + return garrow_schema_new_raw(&arrow_schema); +} + + +typedef struct GArrowExecutePlanPrivate_ { + std::shared_ptr<arrow::compute::ExecPlan> plan; +} GArrowExecutePlanPrivate; + +enum { + PROP_PLAN = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowExecutePlan, + garrow_execute_plan, + G_TYPE_OBJECT) + +#define GARROW_EXECUTE_PLAN_GET_PRIVATE(object) \ + static_cast<GArrowExecutePlanPrivate *>( \ + garrow_execute_plan_get_instance_private( \ + GARROW_EXECUTE_PLAN(object))) + +static void +garrow_execute_plan_finalize(GObject *object) +{ + auto priv = GARROW_EXECUTE_PLAN_GET_PRIVATE(object); + priv->plan.~shared_ptr(); + G_OBJECT_CLASS(garrow_execute_plan_parent_class)->finalize(object); +} + +static void +garrow_execute_plan_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_EXECUTE_PLAN_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_PLAN: + priv->plan = + *static_cast<std::shared_ptr<arrow::compute::ExecPlan> *>( + g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_execute_plan_init(GArrowExecutePlan *object) +{ + auto priv = GARROW_EXECUTE_PLAN_GET_PRIVATE(object); + new(&(priv->plan)) std::shared_ptr<arrow::compute::ExecPlan>; +} + +static void +garrow_execute_plan_class_init(GArrowExecutePlanClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->finalize = garrow_execute_plan_finalize; + gobject_class->set_property = garrow_execute_plan_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("plan", + "Plan", + "The raw std::shared_ptr<arrow::compute::ExecPlan>", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_PLAN, spec); +} + +/** + * garrow_execute_plan_new: + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowExecutePlan on success, + * %NULL on error. + * + * Since: 6.0.0 + */ +GArrowExecutePlan * +garrow_execute_plan_new(GError **error) +{ + auto arrow_plan_result = arrow::compute::ExecPlan::Make(); + if (garrow::check(error, arrow_plan_result, "[execute-plan][new]")) { + return GARROW_EXECUTE_PLAN(g_object_new(GARROW_TYPE_EXECUTE_PLAN, + "plan", &(*arrow_plan_result), + NULL)); + } else { + return NULL; + } +} + +/** + * garrow_execute_plan_build_node: + * @plan: A #GArrowExecutePlan. + * @factory_name: A factory name to build a #GArrowExecuteNode. + * @inputs: (element-type GArrowExecuteNode): An inputs to execute new node. + * @options: A #GArrowExecuteNodeOptions for new node. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): A newly built and added #GArrowExecuteNode + * on success, %NULL on error. + * + * Since: 6.0.0 + */ +GArrowExecuteNode * +garrow_execute_plan_build_node(GArrowExecutePlan *plan, + const gchar *factory_name, + GList *inputs, + GArrowExecuteNodeOptions *options, + GError **error) +{ + auto arrow_plan = garrow_execute_plan_get_raw(plan); + std::vector<arrow::compute::ExecNode *> arrow_inputs; + for (auto node = inputs; node; node = node->next) { + auto arrow_node = + garrow_execute_node_get_raw(GARROW_EXECUTE_NODE(node->data)); + arrow_inputs.push_back(arrow_node); + } + auto arrow_options = garrow_execute_node_options_get_raw(options); + auto arrow_node_result = arrow::compute::MakeExecNode(factory_name, + arrow_plan.get(), + arrow_inputs, + *arrow_options); + if (garrow::check(error, arrow_node_result, "[execute-plan][build-node]")) { + auto arrow_node = *arrow_node_result; + arrow_node->SetLabel(factory_name); + return garrow_execute_node_new_raw(arrow_node); + } else { + return NULL; + } +} + +/** + * garrow_execute_plan_build_source_node: + * @plan: A #GArrowExecutePlan. + * @options: A #GArrowSourceNodeOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * This is a shortcut of garrow_execute_plan_build_node() for source + * node. + * + * Returns: (transfer full): A newly built and added #GArrowExecuteNode + * for source on success, %NULL on error. + * + * Since: 6.0.0 + */ +GArrowExecuteNode * +garrow_execute_plan_build_source_node(GArrowExecutePlan *plan, + GArrowSourceNodeOptions *options, + GError **error) +{ + return garrow_execute_plan_build_node(plan, + "source", + NULL, + GARROW_EXECUTE_NODE_OPTIONS(options), + error); +} + +/** + * garrow_execute_plan_build_aggregate_node: + * @plan: A #GArrowExecutePlan. + * @input: A #GArrowExecuteNode. + * @options: A #GArrowAggregateNodeOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * This is a shortcut of garrow_execute_plan_build_node() for aggregate + * node. + * + * Returns: (transfer full): A newly built and added #GArrowExecuteNode + * for aggregation on success, %NULL on error. + * + * Since: 6.0.0 + */ +GArrowExecuteNode * +garrow_execute_plan_build_aggregate_node(GArrowExecutePlan *plan, + GArrowExecuteNode *input, + GArrowAggregateNodeOptions *options, + GError **error) +{ + GList *inputs = NULL; + inputs = g_list_prepend(inputs, input); + auto node = + garrow_execute_plan_build_node(plan, + "aggregate", + inputs, + GARROW_EXECUTE_NODE_OPTIONS(options), + error); + g_list_free(inputs); + return node; +} + +/** + * garrow_execute_plan_build_sink_node: + * @plan: A #GArrowExecutePlan. + * @input: A #GArrowExecuteNode. + * @options: A #GArrowSinkNodeOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * This is a shortcut of garrow_execute_plan_build_node() for sink + * node. + * + * Returns: (transfer full): A newly built and added #GArrowExecuteNode + * for sink on success, %NULL on error. + * + * Since: 6.0.0 + */ +GArrowExecuteNode * +garrow_execute_plan_build_sink_node(GArrowExecutePlan *plan, + GArrowExecuteNode *input, + GArrowSinkNodeOptions *options, + GError **error) +{ + GList *inputs = NULL; + inputs = g_list_prepend(inputs, input); + auto node = + garrow_execute_plan_build_node(plan, + "sink", + inputs, + GARROW_EXECUTE_NODE_OPTIONS(options), + error); + g_list_free(inputs); + return node; +} + +/** + * garrow_execute_plan_validate: + * @plan: A #GArrowExecutePlan. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE on error. + * + * Since: 6.0.0 + */ +gboolean +garrow_execute_plan_validate(GArrowExecutePlan *plan, + GError **error) +{ + auto arrow_plan = garrow_execute_plan_get_raw(plan); + return garrow::check(error, + arrow_plan->Validate(), + "[execute-plan][validate]"); +} + +/** + * garrow_execute_plan_start: + * @plan: A #GArrowExecutePlan. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Starts this plan. + * + * Returns: %TRUE on success, %FALSE on error. + * + * Since: 6.0.0 + */ +gboolean +garrow_execute_plan_start(GArrowExecutePlan *plan, + GError **error) +{ + auto arrow_plan = garrow_execute_plan_get_raw(plan); + return garrow::check(error, + arrow_plan->StartProducing(), + "[execute-plan][start]"); +} + +/** + * garrow_execute_plan_stop: + * @plan: A #GArrowExecutePlan. + * + * Stops this plan. + * + * Since: 6.0.0 + */ +void +garrow_execute_plan_stop(GArrowExecutePlan *plan) +{ + auto arrow_plan = garrow_execute_plan_get_raw(plan); + arrow_plan->StopProducing(); +} + +/** + * garrow_execute_plan_wait: + * @plan: A #GArrowExecutePlan. + * + * Waits for finishing this plan. + * + * Since: 6.0.0 + */ +void +garrow_execute_plan_wait(GArrowExecutePlan *plan) +{ + auto arrow_plan = garrow_execute_plan_get_raw(plan); + arrow_plan->finished().Wait(); +} + + +typedef struct GArrowCastOptionsPrivate_ { + GArrowDataType *to_data_type; +} GArrowCastOptionsPrivate; + +enum { + PROP_TO_DATA_TYPE = 1, + PROP_ALLOW_INT_OVERFLOW, + PROP_ALLOW_TIME_TRUNCATE, + PROP_ALLOW_TIME_OVERFLOW, + PROP_ALLOW_DECIMAL_TRUNCATE, + PROP_ALLOW_FLOAT_TRUNCATE, + PROP_ALLOW_INVALID_UTF8, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowCastOptions, + garrow_cast_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +#define GARROW_CAST_OPTIONS_GET_PRIVATE(object) \ + static_cast<GArrowCastOptionsPrivate *>( \ + garrow_cast_options_get_instance_private( \ + GARROW_CAST_OPTIONS(object))) + +static void +garrow_cast_options_dispose(GObject *object) +{ + auto priv = GARROW_CAST_OPTIONS_GET_PRIVATE(object); + + if (priv->to_data_type) { + g_object_unref(priv->to_data_type); + priv->to_data_type = NULL; + } + + G_OBJECT_CLASS(garrow_cast_options_parent_class)->dispose(object); +} + +static void +garrow_cast_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_CAST_OPTIONS_GET_PRIVATE(object); + auto options = garrow_cast_options_get_raw(GARROW_CAST_OPTIONS(object)); + + switch (prop_id) { + case PROP_TO_DATA_TYPE: + { + auto to_data_type = g_value_dup_object(value); + if (priv->to_data_type) { + g_object_unref(priv->to_data_type); + } + if (to_data_type) { + priv->to_data_type = GARROW_DATA_TYPE(to_data_type); + options->to_type = garrow_data_type_get_raw(priv->to_data_type); + } else { + priv->to_data_type = NULL; + options->to_type = nullptr; + } + break; + } + case PROP_ALLOW_INT_OVERFLOW: + options->allow_int_overflow = g_value_get_boolean(value); + break; + case PROP_ALLOW_TIME_TRUNCATE: + options->allow_time_truncate = g_value_get_boolean(value); + break; + case PROP_ALLOW_TIME_OVERFLOW: + options->allow_time_overflow = g_value_get_boolean(value); + break; + case PROP_ALLOW_DECIMAL_TRUNCATE: + options->allow_decimal_truncate = g_value_get_boolean(value); + break; + case PROP_ALLOW_FLOAT_TRUNCATE: + options->allow_float_truncate = g_value_get_boolean(value); + break; + case PROP_ALLOW_INVALID_UTF8: + options->allow_invalid_utf8 = g_value_get_boolean(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_cast_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_CAST_OPTIONS_GET_PRIVATE(object); + auto options = garrow_cast_options_get_raw(GARROW_CAST_OPTIONS(object)); + + switch (prop_id) { + case PROP_TO_DATA_TYPE: + g_value_set_object(value, priv->to_data_type); + break; + case PROP_ALLOW_INT_OVERFLOW: + g_value_set_boolean(value, options->allow_int_overflow); + break; + case PROP_ALLOW_TIME_TRUNCATE: + g_value_set_boolean(value, options->allow_time_truncate); + break; + case PROP_ALLOW_TIME_OVERFLOW: + g_value_set_boolean(value, options->allow_time_overflow); + break; + case PROP_ALLOW_DECIMAL_TRUNCATE: + g_value_set_boolean(value, options->allow_decimal_truncate); + break; + case PROP_ALLOW_FLOAT_TRUNCATE: + g_value_set_boolean(value, options->allow_float_truncate); + break; + case PROP_ALLOW_INVALID_UTF8: + g_value_set_boolean(value, options->allow_invalid_utf8); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_cast_options_init(GArrowCastOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = static_cast<arrow::compute::FunctionOptions *>( + new arrow::compute::CastOptions()); +} + +static void +garrow_cast_options_class_init(GArrowCastOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_cast_options_dispose; + gobject_class->set_property = garrow_cast_options_set_property; + gobject_class->get_property = garrow_cast_options_get_property; + + GParamSpec *spec; + + /** + * GArrowCastOptions:to-data-type: + * + * The #GArrowDataType being casted to. + * + * Since: 1.0.0 + */ + spec = g_param_spec_object("to-data-type", + "To data type", + "The GArrowDataType being casted to", + GARROW_TYPE_DATA_TYPE, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_TO_DATA_TYPE, spec); + + /** + * GArrowCastOptions:allow-int-overflow: + * + * Whether integer overflow is allowed or not. + * + * Since: 0.7.0 + */ + spec = g_param_spec_boolean("allow-int-overflow", + "Allow int overflow", + "Whether integer overflow is allowed or not", + FALSE, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_ALLOW_INT_OVERFLOW, spec); + + /** + * GArrowCastOptions:allow-time-truncate: + * + * Whether truncating time value is allowed or not. + * + * Since: 0.8.0 + */ + spec = g_param_spec_boolean("allow-time-truncate", + "Allow time truncate", + "Whether truncating time value is allowed or not", + FALSE, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_ALLOW_TIME_TRUNCATE, spec); + + /** + * GArrowCastOptions:allow-time-overflow: + * + * Whether time overflow is allowed or not. + * + * Since: 1.0.0 + */ + spec = g_param_spec_boolean("allow-time-overflow", + "Allow time overflow", + "Whether time overflow is allowed or not", + FALSE, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_ALLOW_TIME_OVERFLOW, spec); + + /** + * GArrowCastOptions:allow-decimal-truncate: + * + * Whether truncating decimal value is allowed or not. + * + * Since: 1.0.0 + */ + spec = g_param_spec_boolean("allow-decimal-truncate", + "Allow decimal truncate", + "Whether truncating decimal value is allowed or not", + FALSE, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_ALLOW_DECIMAL_TRUNCATE, spec); + + /** + * GArrowCastOptions:allow-float-truncate: + * + * Whether truncating float value is allowed or not. + * + * Since: 0.12.0 + */ + spec = g_param_spec_boolean("allow-float-truncate", + "Allow float truncate", + "Whether truncating float value is allowed or not", + FALSE, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_ALLOW_FLOAT_TRUNCATE, spec); + + /** + * GArrowCastOptions:allow-invalid-utf8: + * + * Whether invalid UTF-8 string value is allowed or not. + * + * Since: 0.13.0 + */ + spec = g_param_spec_boolean("allow-invalid-utf8", + "Allow invalid UTF-8", + "Whether invalid UTF-8 string value is allowed or not", + FALSE, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_ALLOW_INVALID_UTF8, spec); +} + +/** + * garrow_cast_options_new: + * + * Returns: A newly created #GArrowCastOptions. + * + * Since: 0.7.0 + */ +GArrowCastOptions * +garrow_cast_options_new(void) +{ + auto cast_options = g_object_new(GARROW_TYPE_CAST_OPTIONS, NULL); + return GARROW_CAST_OPTIONS(cast_options); +} + + +enum { + PROP_SKIP_NULLS = 1, + PROP_MIN_COUNT, +}; + +G_DEFINE_TYPE(GArrowScalarAggregateOptions, + garrow_scalar_aggregate_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_scalar_aggregate_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_scalar_aggregate_options_get_raw( + GARROW_SCALAR_AGGREGATE_OPTIONS(object)); + + switch (prop_id) { + case PROP_SKIP_NULLS: + options->skip_nulls = g_value_get_boolean(value); + break; + case PROP_MIN_COUNT: + options->min_count = g_value_get_uint(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_scalar_aggregate_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_scalar_aggregate_options_get_raw( + GARROW_SCALAR_AGGREGATE_OPTIONS(object)); + + switch (prop_id) { + case PROP_SKIP_NULLS: + g_value_set_boolean(value, options->skip_nulls); + break; + case PROP_MIN_COUNT: + g_value_set_uint(value, options->min_count); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_scalar_aggregate_options_init(GArrowScalarAggregateOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = static_cast<arrow::compute::FunctionOptions *>( + new arrow::compute::ScalarAggregateOptions()); +} + +static void +garrow_scalar_aggregate_options_class_init( + GArrowScalarAggregateOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_scalar_aggregate_options_set_property; + gobject_class->get_property = garrow_scalar_aggregate_options_get_property; + + auto options = arrow::compute::ScalarAggregateOptions::Defaults(); + + GParamSpec *spec; + /** + * GArrowScalarAggregateOptions:skip-nulls: + * + * Whether NULLs are skipped or not. + * + * Since: 5.0.0 + */ + spec = g_param_spec_boolean("skip-nulls", + "Skip NULLs", + "Whether NULLs are skipped or not", + options.skip_nulls, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_SKIP_NULLS, spec); + + /** + * GArrowScalarAggregateOptions:min-count: + * + * The minimum required number of values. + * + * Since: 5.0.0 + */ + spec = g_param_spec_uint("min-count", + "Min count", + "The minimum required number of values", + 0, + G_MAXUINT, + options.min_count, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_MIN_COUNT, spec); +} + +/** + * garrow_scalar_aggregate_options_new: + * + * Returns: A newly created #GArrowScalarAggregateOptions. + * + * Since: 5.0.0 + */ +GArrowScalarAggregateOptions * +garrow_scalar_aggregate_options_new(void) +{ + auto scalar_aggregate_options = + g_object_new(GARROW_TYPE_SCALAR_AGGREGATE_OPTIONS, NULL); + return GARROW_SCALAR_AGGREGATE_OPTIONS(scalar_aggregate_options); +} + + +enum { + PROP_MODE = 1, +}; + +G_DEFINE_TYPE(GArrowCountOptions, + garrow_count_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_count_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_count_options_get_raw(GARROW_COUNT_OPTIONS(object)); + + switch (prop_id) { + case PROP_MODE: + options->mode = + static_cast<arrow::compute::CountOptions::CountMode>( + g_value_get_enum(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_count_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_count_options_get_raw(GARROW_COUNT_OPTIONS(object)); + + switch (prop_id) { + case PROP_MODE: + g_value_set_enum(value, options->mode); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_count_options_init(GArrowCountOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = static_cast<arrow::compute::FunctionOptions *>( + new arrow::compute::CountOptions()); +} + +static void +garrow_count_options_class_init(GArrowCountOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_count_options_set_property; + gobject_class->get_property = garrow_count_options_get_property; + + auto options = arrow::compute::CountOptions::Defaults(); + + GParamSpec *spec; + /** + * GArrowCountOptions:null-selection-behavior: + * + * How to handle counted values. + * + * Since: 0.17.0 + */ + spec = g_param_spec_enum("mode", + "Count mode", + "Which values to count", + GARROW_TYPE_COUNT_MODE, + static_cast<GArrowCountMode>(options.mode), + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_MODE, spec); +} + +/** + * garrow_count_options_new: + * + * Returns: A newly created #GArrowCountOptions. + * + * Since: 6.0.0 + */ +GArrowCountOptions * +garrow_count_options_new(void) +{ + auto count_options = g_object_new(GARROW_TYPE_COUNT_OPTIONS, NULL); + return GARROW_COUNT_OPTIONS(count_options); +} + + +enum { + PROP_NULL_SELECTION_BEHAVIOR = 1, +}; + +G_DEFINE_TYPE(GArrowFilterOptions, + garrow_filter_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_filter_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_filter_options_get_raw(GARROW_FILTER_OPTIONS(object)); + + switch (prop_id) { + case PROP_NULL_SELECTION_BEHAVIOR: + options->null_selection_behavior = + static_cast<arrow::compute::FilterOptions::NullSelectionBehavior>( + g_value_get_enum(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_filter_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_filter_options_get_raw(GARROW_FILTER_OPTIONS(object)); + + switch (prop_id) { + case PROP_NULL_SELECTION_BEHAVIOR: + g_value_set_enum(value, options->null_selection_behavior); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_filter_options_init(GArrowFilterOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = static_cast<arrow::compute::FunctionOptions *>( + new arrow::compute::FilterOptions()); +} + +static void +garrow_filter_options_class_init(GArrowFilterOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_filter_options_set_property; + gobject_class->get_property = garrow_filter_options_get_property; + + auto options = arrow::compute::FilterOptions::Defaults(); + + GParamSpec *spec; + /** + * GArrowFilterOptions:null-selection-behavior: + * + * How to handle filtered values. + * + * Since: 0.17.0 + */ + spec = g_param_spec_enum("null-selection-behavior", + "NULL selection behavior", + "How to handle filtered values", + GARROW_TYPE_FILTER_NULL_SELECTION_BEHAVIOR, + static_cast<GArrowFilterNullSelectionBehavior>( + options.null_selection_behavior), + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_NULL_SELECTION_BEHAVIOR, + spec); +} + +/** + * garrow_filter_options_new: + * + * Returns: A newly created #GArrowFilterOptions. + * + * Since: 0.17.0 + */ +GArrowFilterOptions * +garrow_filter_options_new(void) +{ + auto filter_options = g_object_new(GARROW_TYPE_FILTER_OPTIONS, NULL); + return GARROW_FILTER_OPTIONS(filter_options); +} + + +G_DEFINE_TYPE(GArrowTakeOptions, + garrow_take_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_take_options_init(GArrowTakeOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = static_cast<arrow::compute::FunctionOptions *>( + new arrow::compute::TakeOptions()); +} + +static void +garrow_take_options_class_init(GArrowTakeOptionsClass *klass) +{ +} + +/** + * garrow_take_options_new: + * + * Returns: A newly created #GArrowTakeOptions. + * + * Since: 0.14.0 + */ +GArrowTakeOptions * +garrow_take_options_new(void) +{ + auto take_options = g_object_new(GARROW_TYPE_TAKE_OPTIONS, NULL); + return GARROW_TAKE_OPTIONS(take_options); +} + + +enum { + PROP_ARRAY_SORT_OPTIONS_ORDER = 1, +}; + +G_DEFINE_TYPE(GArrowArraySortOptions, + garrow_array_sort_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_array_sort_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_array_sort_options_get_raw(GARROW_ARRAY_SORT_OPTIONS(object)); + + switch (prop_id) { + case PROP_ARRAY_SORT_OPTIONS_ORDER: + options->order = + static_cast<arrow::compute::SortOrder>(g_value_get_enum(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_array_sort_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_array_sort_options_get_raw(GARROW_ARRAY_SORT_OPTIONS(object)); + + switch (prop_id) { + case PROP_ARRAY_SORT_OPTIONS_ORDER: + g_value_set_enum(value, static_cast<GArrowSortOrder>(options->order)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_array_sort_options_init(GArrowArraySortOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = static_cast<arrow::compute::FunctionOptions *>( + new arrow::compute::ArraySortOptions()); +} + +static void +garrow_array_sort_options_class_init(GArrowArraySortOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_array_sort_options_set_property; + gobject_class->get_property = garrow_array_sort_options_get_property; + + auto options = arrow::compute::ArraySortOptions::Defaults(); + + GParamSpec *spec; + /** + * GArrowArraySortOptions:order: + * + * How to order values. + * + * Since: 3.0.0 + */ + spec = g_param_spec_enum("order", + "Order", + "How to order values", + GARROW_TYPE_SORT_ORDER, + static_cast<GArrowSortOrder>(options.order), + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_ARRAY_SORT_OPTIONS_ORDER, + spec); +} + +/** + * garrow_array_sort_options_new: + * @order: How to order by values. + * + * Returns: A newly created #GArrowArraySortOptions. + * + * Since: 3.0.0 + */ +GArrowArraySortOptions * +garrow_array_sort_options_new(GArrowSortOrder order) +{ + auto array_sort_options = + g_object_new(GARROW_TYPE_ARRAY_SORT_OPTIONS, + "order", order, + NULL); + return GARROW_ARRAY_SORT_OPTIONS(array_sort_options); +} + +/** + * garrow_array_sort_options_equal: + * @options: A #GArrowArraySortOptions. + * @other_options: A #GArrowArraySortOptions to be compared. + * + * Returns: %TRUE if both of them have the same order, %FALSE + * otherwise. + * + * Since: 3.0.0 + */ +gboolean +garrow_array_sort_options_equal(GArrowArraySortOptions *options, + GArrowArraySortOptions *other_options) +{ + auto arrow_options = garrow_array_sort_options_get_raw(options); + auto arrow_other_options = garrow_array_sort_options_get_raw(other_options); + return arrow_options->order == arrow_other_options->order; +} + + +typedef struct GArrowSortKeyPrivate_ { + arrow::compute::SortKey sort_key; +} GArrowSortKeyPrivate; + +enum { + PROP_SORT_KEY_NAME = 1, + PROP_SORT_KEY_ORDER, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowSortKey, + garrow_sort_key, + G_TYPE_OBJECT) + +#define GARROW_SORT_KEY_GET_PRIVATE(object) \ + static_cast<GArrowSortKeyPrivate *>( \ + garrow_sort_key_get_instance_private( \ + GARROW_SORT_KEY(object))) + +static void +garrow_sort_key_finalize(GObject *object) +{ + auto priv = GARROW_SORT_KEY_GET_PRIVATE(object); + priv->sort_key.~SortKey(); + G_OBJECT_CLASS(garrow_sort_key_parent_class)->finalize(object); +} + +static void +garrow_sort_key_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_SORT_KEY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_SORT_KEY_NAME: + priv->sort_key.name = g_value_get_string(value); + break; + case PROP_SORT_KEY_ORDER: + priv->sort_key.order = + static_cast<arrow::compute::SortOrder>(g_value_get_enum(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_sort_key_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_SORT_KEY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_SORT_KEY_NAME: + g_value_set_string(value, priv->sort_key.name.c_str()); + break; + case PROP_SORT_KEY_ORDER: + g_value_set_enum(value, static_cast<GArrowSortOrder>(priv->sort_key.order)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_sort_key_init(GArrowSortKey *object) +{ + auto priv = GARROW_SORT_KEY_GET_PRIVATE(object); + new(&priv->sort_key) arrow::compute::SortKey(""); +} + +static void +garrow_sort_key_class_init(GArrowSortKeyClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_sort_key_finalize; + gobject_class->set_property = garrow_sort_key_set_property; + gobject_class->get_property = garrow_sort_key_get_property; + + GParamSpec *spec; + /** + * GArrowSortKey:name: + * + * The column name to be used. + * + * Since: 3.0.0 + */ + spec = g_param_spec_string("name", + "Name", + "The column name to be used", + NULL, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_SORT_KEY_NAME, spec); + + /** + * GArrowSortKey:order: + * + * How to order values. + * + * Since: 3.0.0 + */ + spec = g_param_spec_enum("order", + "Order", + "How to order values", + GARROW_TYPE_SORT_ORDER, + 0, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_SORT_KEY_ORDER, spec); +} + +/** + * garrow_sort_key_new: + * @name: A column name to be used. + * @order: How to order by this sort key. + * + * Returns: A newly created #GArrowSortKey. + * + * Since: 3.0.0 + */ +GArrowSortKey * +garrow_sort_key_new(const gchar *name, GArrowSortOrder order) +{ + auto sort_key = g_object_new(GARROW_TYPE_SORT_KEY, + "name", name, + "order", order, + NULL); + return GARROW_SORT_KEY(sort_key); +} + +/** + * garrow_sort_key_equal: + * @sort_key: A #GArrowSortKey. + * @other_sort_key: A #GArrowSortKey to be compared. + * + * Returns: %TRUE if both of them have the same name and order, %FALSE + * otherwise. + * + * Since: 3.0.0 + */ +gboolean +garrow_sort_key_equal(GArrowSortKey *sort_key, + GArrowSortKey *other_sort_key) +{ + auto arrow_sort_key = garrow_sort_key_get_raw(sort_key); + auto arrow_other_sort_key = garrow_sort_key_get_raw(other_sort_key); + return garrow_sort_key_equal_raw(*arrow_sort_key, + *arrow_other_sort_key); +} + + +G_DEFINE_TYPE(GArrowSortOptions, + garrow_sort_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_sort_options_init(GArrowSortOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = static_cast<arrow::compute::FunctionOptions *>( + new arrow::compute::SortOptions()); +} + +static void +garrow_sort_options_class_init(GArrowSortOptionsClass *klass) +{ +} + +/** + * garrow_sort_options_new: + * @sort_keys: (nullable) (element-type GArrowSortKey): The sort keys to be used. + * + * Returns: A newly created #GArrowSortOptions. + * + * Since: 3.0.0 + */ +GArrowSortOptions * +garrow_sort_options_new(GList *sort_keys) +{ + auto sort_options = + GARROW_SORT_OPTIONS(g_object_new(GARROW_TYPE_SORT_OPTIONS, NULL)); + if (sort_keys) { + garrow_sort_options_set_sort_keys(sort_options, sort_keys); + } + return sort_options; +} + +/** + * garrow_sort_options_equal: + * @options: A #GArrowSortOptions. + * @other_options: A #GArrowSortOptions to be compared. + * + * Returns: %TRUE if both of them have the same sort keys, %FALSE + * otherwise. + * + * Since: 3.0.0 + */ +gboolean +garrow_sort_options_equal(GArrowSortOptions *options, + GArrowSortOptions *other_options) +{ + auto arrow_options = garrow_sort_options_get_raw(options); + auto arrow_other_options = garrow_sort_options_get_raw(other_options); + if (arrow_options->sort_keys.size() != + arrow_other_options->sort_keys.size()) { + return FALSE; + } + const auto n_sort_keys = arrow_options->sort_keys.size(); + for (size_t i = 0; i < n_sort_keys; ++i) { + if (!garrow_sort_key_equal_raw(arrow_options->sort_keys[i], + arrow_other_options->sort_keys[i])) { + return FALSE; + } + } + return TRUE; +} + +/** + * garrow_sort_options_get_sort_keys: + * @options: A #GArrowSortOptions. + * + * Returns: (transfer full) (element-type GArrowSortKey): + * The sort keys to be used. + * + * Since: 3.0.0 + */ +GList * +garrow_sort_options_get_sort_keys(GArrowSortOptions *options) +{ + auto arrow_options = garrow_sort_options_get_raw(options); + GList *sort_keys = NULL; + for (const auto &arrow_sort_key : arrow_options->sort_keys) { + auto sort_key = + garrow_sort_key_new(arrow_sort_key.name.c_str(), + static_cast<GArrowSortOrder>(arrow_sort_key.order)); + sort_keys = g_list_prepend(sort_keys, sort_key); + } + return g_list_reverse(sort_keys); +} + +/** + * garrow_sort_options_add_sort_key: + * @options: A #GArrowSortOptions. + * @sort_key: The sort key to be added. + * + * Add a sort key to be used. + * + * Since: 3.0.0 + */ +void +garrow_sort_options_add_sort_key(GArrowSortOptions *options, + GArrowSortKey *sort_key) +{ + auto arrow_options = garrow_sort_options_get_raw(options); + auto arrow_sort_key = garrow_sort_key_get_raw(sort_key); + arrow_options->sort_keys.push_back(*arrow_sort_key); +} + +/** + * garrow_sort_options_set_sort_keys: + * @options: A #GArrowSortOptions. + * @sort_keys: (element-type GArrowSortKey): The sort keys to be used. + * + * Set sort keys to be used. + * + * Since: 3.0.0 + */ +void +garrow_sort_options_set_sort_keys(GArrowSortOptions *options, + GList *sort_keys) +{ + auto arrow_options = garrow_sort_options_get_raw(options); + arrow_options->sort_keys.clear(); + for (auto node = sort_keys; node; node = node->next) { + auto sort_key = GARROW_SORT_KEY(node->data); + auto arrow_sort_key = garrow_sort_key_get_raw(sort_key); + arrow_options->sort_keys.push_back(*arrow_sort_key); + } +} + + +typedef struct GArrowSetLookupOptionsPrivate_ { + GArrowDatum *value_set; +} GArrowSetLookupOptionsPrivate; + +enum { + PROP_SET_LOOKUP_OPTIONS_VALUE_SET = 1, + PROP_SET_LOOKUP_OPTIONS_SKIP_NULLS, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowSetLookupOptions, + garrow_set_lookup_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +#define GARROW_SET_LOOKUP_OPTIONS_GET_PRIVATE(object) \ + static_cast<GArrowSetLookupOptionsPrivate *>( \ + garrow_set_lookup_options_get_instance_private( \ + GARROW_SET_LOOKUP_OPTIONS(object))) + +static void +garrow_set_lookup_options_dispose(GObject *object) +{ + auto priv = GARROW_SET_LOOKUP_OPTIONS_GET_PRIVATE(object); + + if (priv->value_set) { + g_object_unref(priv->value_set); + priv->value_set = NULL; + } + + G_OBJECT_CLASS(garrow_set_lookup_options_parent_class)->dispose(object); +} + +static void +garrow_set_lookup_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_SET_LOOKUP_OPTIONS_GET_PRIVATE(object); + auto options = + garrow_set_lookup_options_get_raw(GARROW_SET_LOOKUP_OPTIONS(object)); + + switch (prop_id) { + case PROP_SET_LOOKUP_OPTIONS_VALUE_SET: + priv->value_set = GARROW_DATUM(g_value_dup_object(value)); + options->value_set = garrow_datum_get_raw(priv->value_set); + break; + case PROP_SET_LOOKUP_OPTIONS_SKIP_NULLS: + options->skip_nulls = g_value_get_boolean(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_set_lookup_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_SET_LOOKUP_OPTIONS_GET_PRIVATE(object); + auto options = + garrow_set_lookup_options_get_raw(GARROW_SET_LOOKUP_OPTIONS(object)); + + switch (prop_id) { + case PROP_SET_LOOKUP_OPTIONS_VALUE_SET: + g_value_set_object(value, priv->value_set); + break; + case PROP_SET_LOOKUP_OPTIONS_SKIP_NULLS: + g_value_set_boolean(value, options->skip_nulls); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_set_lookup_options_init(GArrowSetLookupOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = static_cast<arrow::compute::FunctionOptions *>( + new arrow::compute::SetLookupOptions()); +} + +static void +garrow_set_lookup_options_class_init(GArrowSetLookupOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_set_lookup_options_dispose; + gobject_class->set_property = garrow_set_lookup_options_set_property; + gobject_class->get_property = garrow_set_lookup_options_get_property; + + + arrow::compute::SetLookupOptions options; + + GParamSpec *spec; + /** + * GArrowSetLookupOptions:value-set: + * + * The set of values to look up input values into. + * + * Since: 6.0.0 + */ + spec = g_param_spec_object("value-set", + "Value set", + "The set of values to look up input values into", + GARROW_TYPE_DATUM, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, + PROP_SET_LOOKUP_OPTIONS_VALUE_SET, + spec); + + /** + * GArrowSetLookupOptions:skip-nulls: + * + * Whether NULLs are skipped or not. + * + * Since: 6.0.0 + */ + spec = g_param_spec_boolean("skip-nulls", + "Skip NULLs", + "Whether NULLs are skipped or not", + options.skip_nulls, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_SET_LOOKUP_OPTIONS_SKIP_NULLS, + spec); +} + +/** + * garrow_set_lookup_options_new: + * @value_set: A #GArrowArrayDatum or #GArrowChunkedArrayDatum to be looked up. + * + * Returns: A newly created #GArrowSetLookupOptions. + * + * Since: 6.0.0 + */ +GArrowSetLookupOptions * +garrow_set_lookup_options_new(GArrowDatum *value_set) +{ + return GARROW_SET_LOOKUP_OPTIONS( + g_object_new(GARROW_TYPE_SET_LOOKUP_OPTIONS, + "value-set", value_set, + NULL)); +} + + +enum { + PROP_VARIANCE_OPTIONS_DDOF = 1, + PROP_VARIANCE_OPTIONS_SKIP_NULLS, + PROP_VARIANCE_OPTIONS_MIN_COUNT, +}; + +G_DEFINE_TYPE(GArrowVarianceOptions, + garrow_variance_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +#define GARROW_VARIANCE_OPTIONS_GET_PRIVATE(object) \ + static_cast<GArrowVarianceOptionsPrivate *>( \ + garrow_variance_options_get_instance_private( \ + GARROW_VARIANCE_OPTIONS(object))) + +static void +garrow_variance_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_variance_options_get_raw(GARROW_VARIANCE_OPTIONS(object)); + + switch (prop_id) { + case PROP_VARIANCE_OPTIONS_DDOF: + options->ddof = g_value_get_int(value); + break; + case PROP_VARIANCE_OPTIONS_SKIP_NULLS: + options->skip_nulls = g_value_get_boolean(value); + break; + case PROP_VARIANCE_OPTIONS_MIN_COUNT: + options->min_count = g_value_get_uint(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_variance_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_variance_options_get_raw(GARROW_VARIANCE_OPTIONS(object)); + + switch (prop_id) { + case PROP_VARIANCE_OPTIONS_DDOF: + g_value_set_int(value, options->ddof); + break; + case PROP_VARIANCE_OPTIONS_SKIP_NULLS: + g_value_set_boolean(value, options->skip_nulls); + break; + case PROP_VARIANCE_OPTIONS_MIN_COUNT: + g_value_set_uint(value, options->min_count); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_variance_options_init(GArrowVarianceOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = static_cast<arrow::compute::FunctionOptions *>( + new arrow::compute::VarianceOptions()); +} + +static void +garrow_variance_options_class_init(GArrowVarianceOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_variance_options_set_property; + gobject_class->get_property = garrow_variance_options_get_property; + + + arrow::compute::VarianceOptions options; + + GParamSpec *spec; + /** + * GArrowVarianceOptions:ddof: + * + * The Delta Degrees of Freedom (ddof) to be used. + * + * Since: 6.0.0 + */ + spec = g_param_spec_int("ddof", + "Delta Degrees of Freedom", + "The Delta Degrees of Freedom (ddof) to be used", + G_MININT, + G_MAXINT, + options.ddof, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_VARIANCE_OPTIONS_DDOF, + spec); + + /** + * GArrowVarianceOptions:skip-nulls: + * + * Whether NULLs are skipped or not. + * + * Since: 6.0.0 + */ + spec = g_param_spec_boolean("skip-nulls", + "Skip NULLs", + "Whether NULLs are skipped or not", + options.skip_nulls, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_VARIANCE_OPTIONS_SKIP_NULLS, + spec); + + /** + * GArrowVarianceOptions:min-count: + * + * If less than this many non-null values are observed, emit null. + * + * Since: 6.0.0 + */ + spec = g_param_spec_uint("min-count", + "Min count", + "If less than this many non-null values " + "are observed, emit null", + 0, + G_MAXUINT, + options.min_count, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_VARIANCE_OPTIONS_MIN_COUNT, + spec); + +} + +/** + * garrow_variance_options_new: + * + * Returns: A newly created #GArrowVarianceOptions. + * + * Since: 6.0.0 + */ +GArrowVarianceOptions * +garrow_variance_options_new(void) +{ + return GARROW_VARIANCE_OPTIONS( + g_object_new(GARROW_TYPE_VARIANCE_OPTIONS, NULL)); +} + + +/** + * garrow_array_cast: + * @array: A #GArrowArray. + * @target_data_type: A #GArrowDataType of cast target data. + * @options: (nullable): A #GArrowCastOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): + * A newly created casted array on success, %NULL on error. + * + * Since: 0.7.0 + */ +GArrowArray * +garrow_array_cast(GArrowArray *array, + GArrowDataType *target_data_type, + GArrowCastOptions *options, + GError **error) +{ + auto arrow_array = garrow_array_get_raw(array); + auto arrow_array_raw = arrow_array.get(); + auto arrow_target_data_type = garrow_data_type_get_raw(target_data_type); + arrow::Result<std::shared_ptr<arrow::Array>> arrow_casted_array; + if (options) { + auto arrow_options = garrow_cast_options_get_raw(options); + arrow_casted_array = arrow::compute::Cast(*arrow_array_raw, + arrow_target_data_type, + *arrow_options); + } else { + arrow_casted_array = arrow::compute::Cast(*arrow_array_raw, + arrow_target_data_type); + } + if (garrow::check(error, + arrow_casted_array, + [&]() { + std::stringstream message; + message << "[array][cast] <"; + message << arrow_array->type()->ToString(); + message << "> -> <"; + message << arrow_target_data_type->ToString(); + message << ">"; + return message.str(); + })) { + return garrow_array_new_raw(&(*arrow_casted_array)); + } else { + return NULL; + } +} + +/** + * garrow_array_unique: + * @array: A #GArrowArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): + * A newly created unique elements array on success, %NULL on error. + * + * Since: 0.8.0 + */ +GArrowArray * +garrow_array_unique(GArrowArray *array, + GError **error) +{ + auto arrow_array = garrow_array_get_raw(array); + auto arrow_unique_array = arrow::compute::Unique(arrow_array); + if (garrow::check(error, + arrow_unique_array, + [&]() { + std::stringstream message; + message << "[array][unique] <"; + message << arrow_array->type()->ToString(); + message << ">"; + return message.str(); + })) { + return garrow_array_new_raw(&(*arrow_unique_array)); + } else { + return NULL; + } +} + +/** + * garrow_array_dictionary_encode: + * @array: A #GArrowArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): + * A newly created #GArrowDictionaryArray for the @array on success, + * %NULL on error. + * + * Since: 0.8.0 + */ +GArrowDictionaryArray * +garrow_array_dictionary_encode(GArrowArray *array, + GError **error) +{ + auto arrow_array = garrow_array_get_raw(array); + auto arrow_dictionary_encoded_datum = + arrow::compute::DictionaryEncode(arrow_array); + if (garrow::check(error, + arrow_dictionary_encoded_datum, + [&]() { + std::stringstream message; + message << "[array][dictionary-encode] <"; + message << arrow_array->type()->ToString(); + message << ">"; + return message.str(); + })) { + auto arrow_dictionary_encoded_array = + (*arrow_dictionary_encoded_datum).make_array(); + auto dictionary_encoded_array = + garrow_array_new_raw(&arrow_dictionary_encoded_array); + return GARROW_DICTIONARY_ARRAY(dictionary_encoded_array); + } else { + return NULL; + } +} + +/** + * garrow_array_count: + * @array: A #GArrowArray. + * @options: (nullable): A #GArrowCountOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: The number of target values on success. If an error is occurred, + * the returned value is untrustful value. + * + * Since: 0.13.0 + */ +gint64 +garrow_array_count(GArrowArray *array, + GArrowCountOptions *options, + GError **error) +{ + auto arrow_array = garrow_array_get_raw(array); + auto arrow_array_raw = arrow_array.get(); + arrow::Result<arrow::Datum> arrow_counted_datum; + if (options) { + auto arrow_options = garrow_count_options_get_raw(options); + arrow_counted_datum = + arrow::compute::Count(*arrow_array_raw, *arrow_options); + } else { + arrow_counted_datum = arrow::compute::Count(*arrow_array_raw); + } + if (garrow::check(error, arrow_counted_datum, "[array][count]")) { + using ScalarType = typename arrow::TypeTraits<arrow::Int64Type>::ScalarType; + auto arrow_counted_scalar = + std::dynamic_pointer_cast<ScalarType>((*arrow_counted_datum).scalar()); + return arrow_counted_scalar->value; + } else { + return 0; + } +} + +/** + * garrow_array_count_values: + * @array: A #GArrowArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): + * A #GArrowStructArray of `{input type "values", int64_t "counts"}` + * on success, %NULL on error. + * + * Since: 0.13.0 + */ +GArrowStructArray * +garrow_array_count_values(GArrowArray *array, + GError **error) +{ + auto arrow_array = garrow_array_get_raw(array); + auto arrow_counted_values = arrow::compute::ValueCounts(arrow_array); + if (garrow::check(error, arrow_counted_values, "[array][count-values]")) { + std::shared_ptr<arrow::Array> arrow_counted_values_array = *arrow_counted_values; + return GARROW_STRUCT_ARRAY(garrow_array_new_raw(&arrow_counted_values_array)); + } else { + return NULL; + } +} + + +/** + * garrow_boolean_array_invert: + * @array: A #GArrowBooleanArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): The element-wise inverted boolean array. + * + * It should be freed with g_object_unref() when no longer needed. + * + * Since: 0.13.0 + */ +GArrowBooleanArray * +garrow_boolean_array_invert(GArrowBooleanArray *array, + GError **error) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_inverted_datum = arrow::compute::Invert(arrow_array); + if (garrow::check(error, arrow_inverted_datum, "[boolean-array][invert]")) { + auto arrow_inverted_array = (*arrow_inverted_datum).make_array(); + return GARROW_BOOLEAN_ARRAY(garrow_array_new_raw(&arrow_inverted_array)); + } else { + return NULL; + } +} + +/** + * garrow_boolean_array_and: + * @left: A left hand side #GArrowBooleanArray. + * @right: A right hand side #GArrowBooleanArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): The element-wise AND operated boolean array. + * + * It should be freed with g_object_unref() when no longer needed. + * + * Since: 0.13.0 + */ +GArrowBooleanArray * +garrow_boolean_array_and(GArrowBooleanArray *left, + GArrowBooleanArray *right, + GError **error) +{ + auto arrow_left = garrow_array_get_raw(GARROW_ARRAY(left)); + auto arrow_right = garrow_array_get_raw(GARROW_ARRAY(right)); + auto arrow_operated_datum = arrow::compute::And(arrow_left, arrow_right); + if (garrow::check(error, arrow_operated_datum, "[boolean-array][and]")) { + auto arrow_operated_array = (*arrow_operated_datum).make_array(); + return GARROW_BOOLEAN_ARRAY(garrow_array_new_raw(&arrow_operated_array)); + } else { + return NULL; + } +} + +/** + * garrow_boolean_array_or: + * @left: A left hand side #GArrowBooleanArray. + * @right: A right hand side #GArrowBooleanArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): The element-wise OR operated boolean array. + * + * It should be freed with g_object_unref() when no longer needed. + * + * Since: 0.13.0 + */ +GArrowBooleanArray * +garrow_boolean_array_or(GArrowBooleanArray *left, + GArrowBooleanArray *right, + GError **error) +{ + auto arrow_left = garrow_array_get_raw(GARROW_ARRAY(left)); + auto arrow_right = garrow_array_get_raw(GARROW_ARRAY(right)); + auto arrow_operated_datum = arrow::compute::Or(arrow_left, arrow_right); + if (garrow::check(error, arrow_operated_datum, "[boolean-array][or]")) { + auto arrow_operated_array = (*arrow_operated_datum).make_array(); + return GARROW_BOOLEAN_ARRAY(garrow_array_new_raw(&arrow_operated_array)); + } else { + return NULL; + } +} + +/** + * garrow_boolean_array_xor: + * @left: A left hand side #GArrowBooleanArray. + * @right: A right hand side #GArrowBooleanArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): The element-wise XOR operated boolean array. + * + * It should be freed with g_object_unref() when no longer needed. + * + * Since: 0.13.0 + */ +GArrowBooleanArray * +garrow_boolean_array_xor(GArrowBooleanArray *left, + GArrowBooleanArray *right, + GError **error) +{ + auto arrow_left = garrow_array_get_raw(GARROW_ARRAY(left)); + auto arrow_right = garrow_array_get_raw(GARROW_ARRAY(right)); + auto arrow_operated_datum = arrow::compute::Xor(arrow_left, arrow_right); + if (garrow::check(error, arrow_operated_datum, "[boolean-array][xor]")) { + auto arrow_operated_array = (*arrow_operated_datum).make_array(); + return GARROW_BOOLEAN_ARRAY(garrow_array_new_raw(&arrow_operated_array)); + } else { + return NULL; + } +} + + +/** + * garrow_numeric_array_mean: + * @array: A #GArrowNumericArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: The value of the computed mean. + * + * Since: 0.13.0 + */ +gdouble +garrow_numeric_array_mean(GArrowNumericArray *array, + GError **error) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_mean_datum = arrow::compute::Mean(arrow_array); + if (garrow::check(error, arrow_mean_datum, "[numeric-array][mean]")) { + using ScalarType = typename arrow::TypeTraits<arrow::DoubleType>::ScalarType; + auto arrow_numeric_scalar = + std::dynamic_pointer_cast<ScalarType>((*arrow_mean_datum).scalar()); + if (arrow_numeric_scalar->is_valid) { + return arrow_numeric_scalar->value; + } else { + return 0.0; + } + } else { + return 0.0; + } +} + + +/** + * garrow_int8_array_sum: + * @array: A #GArrowInt8Array. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: The value of the computed sum on success, + * If an error is occurred, the returned value is untrustful value. + * + * Since: 0.13.0 + */ +gint64 +garrow_int8_array_sum(GArrowInt8Array *array, + GError **error) +{ + return garrow_numeric_array_sum<arrow::Int64Type>(array, + error, + "[int8-array][sum]", + 0); +} + +/** + * garrow_uint8_array_sum: + * @array: A #GArrowUInt8Array. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: The value of the computed sum on success, + * If an error is occurred, the returned value is untrustful value. + * + * Since: 0.13.0 + */ +guint64 +garrow_uint8_array_sum(GArrowUInt8Array *array, + GError **error) +{ + return garrow_numeric_array_sum<arrow::UInt64Type>(array, + error, + "[uint8-array][sum]", + 0); +} + +/** + * garrow_int16_array_sum: + * @array: A #GArrowInt16Array. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: The value of the computed sum on success, + * If an error is occurred, the returned value is untrustful value. + * + * Since: 0.13.0 + */ +gint64 +garrow_int16_array_sum(GArrowInt16Array *array, + GError **error) +{ + return garrow_numeric_array_sum<arrow::Int64Type>(array, + error, + "[int16-array][sum]", + 0); +} + +/** + * garrow_uint16_array_sum: + * @array: A #GArrowUInt16Array. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: The value of the computed sum on success, + * If an error is occurred, the returned value is untrustful value. + * + * Since: 0.13.0 + */ +guint64 +garrow_uint16_array_sum(GArrowUInt16Array *array, + GError **error) +{ + return garrow_numeric_array_sum<arrow::UInt64Type>(array, + error, + "[uint16-array][sum]", + 0); +} + +/** + * garrow_int32_array_sum: + * @array: A #GArrowInt32Array. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: The value of the computed sum on success, + * If an error is occurred, the returned value is untrustful value. + * + * Since: 0.13.0 + */ +gint64 +garrow_int32_array_sum(GArrowInt32Array *array, + GError **error) +{ + return garrow_numeric_array_sum<arrow::Int64Type>(array, + error, + "[int32-array][sum]", + 0); +} + +/** + * garrow_uint32_array_sum: + * @array: A #GArrowUInt32Array. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: The value of the computed sum on success, + * If an error is occurred, the returned value is untrustful value. + * + * Since: 0.13.0 + */ +guint64 +garrow_uint32_array_sum(GArrowUInt32Array *array, + GError **error) +{ + return garrow_numeric_array_sum<arrow::UInt64Type>(array, + error, + "[uint32-array][sum]", + 0); +} + +/** + * garrow_int64_array_sum: + * @array: A #GArrowInt64Array. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: The value of the computed sum on success, + * If an error is occurred, the returned value is untrustful value. + * + * Since: 0.13.0 + */ +gint64 +garrow_int64_array_sum(GArrowInt64Array *array, + GError **error) +{ + return garrow_numeric_array_sum<arrow::Int64Type>(array, + error, + "[int64-array][sum]", + 0); +} + +/** + * garrow_uint64_array_sum: + * @array: A #GArrowUInt64Array. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: The value of the computed sum on success, + * If an error is occurred, the returned value is untrustful value. + * + * Since: 0.13.0 + */ +guint64 +garrow_uint64_array_sum(GArrowUInt64Array *array, + GError **error) +{ + return garrow_numeric_array_sum<arrow::UInt64Type>(array, + error, + "[uint64-array][sum]", + 0); +} + +/** + * garrow_float_array_sum: + * @array: A #GArrowFloatArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: The value of the computed sum on success, + * If an error is occurred, the returned value is untrustful value. + * + * Since: 0.13.0 + */ +gdouble +garrow_float_array_sum(GArrowFloatArray *array, + GError **error) +{ + return garrow_numeric_array_sum<arrow::DoubleType>(array, + error, + "[float-array][sum]", + 0); +} + +/** + * garrow_double_array_sum: + * @array: A #GArrowDoubleArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: The value of the computed sum on success, + * If an error is occurred, the returned value is untrustful value. + * + * Since: 0.13.0 + */ +gdouble +garrow_double_array_sum(GArrowDoubleArray *array, + GError **error) +{ + return garrow_numeric_array_sum<arrow::DoubleType>(array, + error, + "[double-array][sum]", + 0); +} + +/** + * garrow_array_take: + * @array: A #GArrowArray. + * @indices: The indices of values to take. + * @options: (nullable): A #GArrowTakeOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The #GArrowArray taken from + * an array of values at indices in input array or %NULL on error. + * + * Since: 0.14.0 + */ +GArrowArray * +garrow_array_take(GArrowArray *array, + GArrowArray *indices, + GArrowTakeOptions *options, + GError **error) +{ + auto arrow_array = garrow_array_get_raw(array); + auto arrow_indices = garrow_array_get_raw(indices); + return garrow_take( + arrow::Datum(arrow_array), + arrow::Datum(arrow_indices), + options, + [](arrow::Datum arrow_datum) { + auto arrow_taken_array = arrow_datum.make_array(); + return garrow_array_new_raw(&arrow_taken_array); + }, + error, + "[array][take][array]"); +} + +/** + * garrow_array_take_chunked_array: + * @array: A #GArrowArray. + * @indices: The indices of values to take. + * @options: (nullable): A #GArrowTakeOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The #GArrowChunkedArray taken from + * an array of values at indices in chunked array or %NULL on error. + * + * Since: 0.16.0 + */ +GArrowChunkedArray * +garrow_array_take_chunked_array(GArrowArray *array, + GArrowChunkedArray *indices, + GArrowTakeOptions *options, + GError **error) +{ + auto arrow_array = garrow_array_get_raw(array); + auto arrow_indices = garrow_chunked_array_get_raw(indices); + return garrow_take( + arrow::Datum(arrow_array), + arrow::Datum(arrow_indices), + options, + [](arrow::Datum arrow_datum) { + auto arrow_taken_chunked_array = arrow_datum.chunked_array(); + return garrow_chunked_array_new_raw(&arrow_taken_chunked_array); + }, + error, + "[array][take][chunked-array]"); +} + +/** + * garrow_table_take: + * @table: A #GArrowTable. + * @indices: The indices of values to take. + * @options: (nullable): A #GArrowTakeOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The #GArrowTable taken from + * an array of values at indices in input array or %NULL on error. + * + * Since: 0.16.0 + */ +GArrowTable * +garrow_table_take(GArrowTable *table, + GArrowArray *indices, + GArrowTakeOptions *options, + GError **error) +{ + auto arrow_table = garrow_table_get_raw(table); + auto arrow_indices = garrow_array_get_raw(indices); + return garrow_take( + arrow::Datum(arrow_table), + arrow::Datum(arrow_indices), + options, + [](arrow::Datum arrow_datum) { + auto arrow_taken_table = arrow_datum.table(); + return garrow_table_new_raw(&arrow_taken_table); + }, + error, + "[table][take]"); +} + +/** + * garrow_table_take_chunked_array: + * @table: A #GArrowTable. + * @indices: The indices of values to take. + * @options: (nullable): A #GArrowTakeOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The #GArrowTable taken from + * an array of values at indices in chunked array or %NULL on error. + * + * Since: 0.16.0 + */ +GArrowTable * +garrow_table_take_chunked_array(GArrowTable *table, + GArrowChunkedArray *indices, + GArrowTakeOptions *options, + GError **error) +{ + auto arrow_table = garrow_table_get_raw(table); + auto arrow_indices = garrow_chunked_array_get_raw(indices); + return garrow_take( + arrow::Datum(arrow_table), + arrow::Datum(arrow_indices), + options, + [](arrow::Datum arrow_datum) { + auto arrow_taken_table = arrow_datum.table(); + return garrow_table_new_raw(&arrow_taken_table); + }, + error, + "[table][take][chunked-array]"); +} + +/** + * garrow_chunked_array_take: + * @chunked_array: A #GArrowChunkedArray. + * @indices: The indices of values to take. + * @options: (nullable): A #GArrowTakeOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The #GArrowChunkedArray taken from + * an array of values at indices in input array or %NULL on error. + * + * Since: 0.16.0 + */ +GArrowChunkedArray * +garrow_chunked_array_take(GArrowChunkedArray *chunked_array, + GArrowArray *indices, + GArrowTakeOptions *options, + GError **error) +{ + auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array); + auto arrow_indices = garrow_array_get_raw(indices); + return garrow_take( + arrow::Datum(arrow_chunked_array), + arrow::Datum(arrow_indices), + options, + [](arrow::Datum arrow_datum) { + auto arrow_taken_chunked_array = arrow_datum.chunked_array(); + return garrow_chunked_array_new_raw(&arrow_taken_chunked_array); + }, + error, + "[chunked-array][take]"); +} + +/** + * garrow_chunked_array_take_chunked_array: + * @chunked_array: A #GArrowChunkedArray. + * @indices: The indices of values to take. + * @options: (nullable): A #GArrowTakeOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The #GArrowChunkedArray taken from + * an array of values at indices in chunked array or %NULL on error. + * + * Since: 0.16.0 + */ +GArrowChunkedArray * +garrow_chunked_array_take_chunked_array(GArrowChunkedArray *chunked_array, + GArrowChunkedArray *indices, + GArrowTakeOptions *options, + GError **error) +{ + auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array); + auto arrow_indices = garrow_chunked_array_get_raw(indices); + return garrow_take( + arrow::Datum(arrow_chunked_array), + arrow::Datum(arrow_indices), + options, + [](arrow::Datum arrow_datum) { + auto arrow_taken_chunked_array = arrow_datum.chunked_array(); + return garrow_chunked_array_new_raw(&arrow_taken_chunked_array); + }, + error, + "[chunked-array][take][chunked-array]"); +} + +/** + * garrow_record_batch_take: + * @record_batch: A #GArrowRecordBatch. + * @indices: The indices of values to take. + * @options: (nullable): A #GArrowTakeOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The #GArrowChunkedArray taken from + * an array of values at indices in input array or %NULL on error. + * + * Since: 0.16.0 + */ +GArrowRecordBatch * +garrow_record_batch_take(GArrowRecordBatch *record_batch, + GArrowArray *indices, + GArrowTakeOptions *options, + GError **error) +{ + auto arrow_record_batch = garrow_record_batch_get_raw(record_batch); + auto arrow_indices = garrow_array_get_raw(indices); + return garrow_take( + arrow::Datum(arrow_record_batch), + arrow::Datum(arrow_indices), + options, + [](arrow::Datum arrow_datum) { + auto arrow_taken_record_batch = arrow_datum.record_batch(); + return garrow_record_batch_new_raw(&arrow_taken_record_batch); + }, + error, + "[record-batch][take]"); +} + +/** + * garrow_array_filter: + * @array: A #GArrowArray. + * @filter: The values indicates which values should be filtered out. + * @options: (nullable): A #GArrowFilterOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The #GArrowArray filterd + * with a boolean selection filter. Nulls in the filter will + * result in nulls in the output. + * + * Since: 0.15.0 + */ +GArrowArray * +garrow_array_filter(GArrowArray *array, + GArrowBooleanArray *filter, + GArrowFilterOptions *options, + GError **error) +{ + auto arrow_array = garrow_array_get_raw(array); + auto arrow_filter = garrow_array_get_raw(GARROW_ARRAY(filter)); + arrow::Result<arrow::Datum> arrow_filtered_datum; + if (options) { + auto arrow_options = garrow_filter_options_get_raw(options); + arrow_filtered_datum = arrow::compute::Filter(arrow_array, + arrow_filter, + *arrow_options); + } else { + arrow_filtered_datum = arrow::compute::Filter(arrow_array, + arrow_filter); + } + if (garrow::check(error, arrow_filtered_datum, "[array][filter]")) { + auto arrow_filtered_array = (*arrow_filtered_datum).make_array(); + return garrow_array_new_raw(&arrow_filtered_array); + } else { + return NULL; + } +} + +/** + * garrow_array_is_in: + * @left: A left hand side #GArrowArray. + * @right: A right hand side #GArrowArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The #GArrowBooleanArray + * showing whether each element in the left array is contained + * in right array. + * + * Since: 0.15.0 + */ +GArrowBooleanArray * +garrow_array_is_in(GArrowArray *left, + GArrowArray *right, + GError **error) +{ + auto arrow_left = garrow_array_get_raw(left); + auto arrow_right = garrow_array_get_raw(right); + auto arrow_is_in_datum = arrow::compute::IsIn(arrow_left, arrow_right); + if (garrow::check(error, arrow_is_in_datum, "[array][is-in]")) { + auto arrow_is_in_array = (*arrow_is_in_datum).make_array(); + return GARROW_BOOLEAN_ARRAY(garrow_array_new_raw(&arrow_is_in_array)); + } else { + return NULL; + } +} + +/** + * garrow_array_is_in_chunked_array: + * @left: A left hand side #GArrowArray. + * @right: A right hand side #GArrowChunkedArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The #GArrowBooleanArray + * showing whether each element in the left array is contained + * in right chunked array. + * + * Since: 0.15.0 + */ +GArrowBooleanArray * +garrow_array_is_in_chunked_array(GArrowArray *left, + GArrowChunkedArray *right, + GError **error) +{ + auto arrow_left = garrow_array_get_raw(left); + auto arrow_right = garrow_chunked_array_get_raw(right); + auto arrow_is_in_datum = arrow::compute::IsIn(arrow_left, arrow_right); + if (garrow::check(error, + arrow_is_in_datum, + "[array][is-in][chunked-array]")) { + auto arrow_is_in_array = (*arrow_is_in_datum).make_array(); + return GARROW_BOOLEAN_ARRAY(garrow_array_new_raw(&arrow_is_in_array)); + } else { + return NULL; + } +} + +/** + * garrow_array_sort_indices: + * @array: A #GArrowArray. + * @order: The order for sort. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The indices that would sort + * an array in the specified order on success, %NULL on error. + * + * Since: 3.0.0 + */ +GArrowUInt64Array * +garrow_array_sort_indices(GArrowArray *array, + GArrowSortOrder order, + GError **error) +{ + auto arrow_array = garrow_array_get_raw(array); + auto arrow_array_raw = arrow_array.get(); + auto arrow_order = static_cast<arrow::compute::SortOrder>(order); + auto arrow_indices_array = + arrow::compute::SortIndices(*arrow_array_raw, arrow_order); + if (garrow::check(error, arrow_indices_array, "[array][sort-indices]")) { + return GARROW_UINT64_ARRAY(garrow_array_new_raw(&(*arrow_indices_array))); + } else { + return NULL; + } +} + +/** + * garrow_array_sort_to_indices: + * @array: A #GArrowArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The indices that would sort + * an array in ascending order on success, %NULL on error. + * + * Since: 0.15.0 + * + * Deprecated: 3.0.0: Use garrow_array_sort_indices() instead. + */ +GArrowUInt64Array * +garrow_array_sort_to_indices(GArrowArray *array, + GError **error) +{ + return garrow_array_sort_indices(array, GARROW_SORT_ORDER_ASCENDING, error); +} + +/** + * garrow_chunked_array_sort_indices: + * @chunked_array: A #GArrowChunkedArray. + * @order: The order for sort. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The indices that would sort + * a chunked array in the specified order on success, %NULL on error. + * + * Since: 3.0.0 + */ +GArrowUInt64Array * +garrow_chunked_array_sort_indices(GArrowChunkedArray *chunked_array, + GArrowSortOrder order, + GError **error) +{ + auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array); + auto arrow_chunked_array_raw = arrow_chunked_array.get(); + auto arrow_order = static_cast<arrow::compute::SortOrder>(order); + auto arrow_indices_array = + arrow::compute::SortIndices(*arrow_chunked_array_raw, arrow_order); + if (garrow::check(error, + arrow_indices_array, + "[chunked-array][sort-indices]")) { + return GARROW_UINT64_ARRAY(garrow_array_new_raw(&(*arrow_indices_array))); + } else { + return NULL; + } +} + +/** + * garrow_record_batch_sort_indices: + * @record_batch: A #GArrowRecordBatch. + * @options: The options to be used. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The indices that would sort + * a record batch with the specified options on success, %NULL on error. + * + * Since: 3.0.0 + */ +GArrowUInt64Array * +garrow_record_batch_sort_indices(GArrowRecordBatch *record_batch, + GArrowSortOptions *options, + GError **error) +{ + auto arrow_record_batch = garrow_record_batch_get_raw(record_batch); + auto arrow_record_batch_raw = arrow_record_batch.get(); + auto arrow_options = garrow_sort_options_get_raw(options); + auto arrow_indices_array = + arrow::compute::SortIndices(::arrow::Datum(*arrow_record_batch_raw), + *arrow_options); + if (garrow::check(error, + arrow_indices_array, + "[record-batch][sort-indices]")) { + return GARROW_UINT64_ARRAY(garrow_array_new_raw(&(*arrow_indices_array))); + } else { + return NULL; + } +} + +/** + * garrow_table_sort_indices: + * @table: A #GArrowTable. + * @options: The options to be used. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The indices that would sort + * a table with the specified options on success, %NULL on error. + * + * Since: 3.0.0 + */ +GArrowUInt64Array * +garrow_table_sort_indices(GArrowTable *table, + GArrowSortOptions *options, + GError **error) +{ + auto arrow_table = garrow_table_get_raw(table); + auto arrow_table_raw = arrow_table.get(); + auto arrow_options = garrow_sort_options_get_raw(options); + auto arrow_indices_array = + arrow::compute::SortIndices(::arrow::Datum(*arrow_table_raw), + *arrow_options); + if (garrow::check(error, + arrow_indices_array, + "[table][sort-indices]")) { + return GARROW_UINT64_ARRAY(garrow_array_new_raw(&(*arrow_indices_array))); + } else { + return NULL; + } +} + +/** + * garrow_table_filter: + * @table: A #GArrowTable. + * @filter: The values indicates which values should be filtered out. + * @options: (nullable): A #GArrowFilterOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The #GArrowTable filterd + * with a boolean selection filter. Nulls in the filter will + * result in nulls in the output. + * + * Since: 0.15.0 + */ +GArrowTable * +garrow_table_filter(GArrowTable *table, + GArrowBooleanArray *filter, + GArrowFilterOptions *options, + GError **error) +{ + auto arrow_table = garrow_table_get_raw(table); + auto arrow_filter = garrow_array_get_raw(GARROW_ARRAY(filter)); + arrow::Result<arrow::Datum> arrow_filtered_datum; + if (options) { + auto arrow_options = garrow_filter_options_get_raw(options); + arrow_filtered_datum = arrow::compute::Filter(arrow_table, + arrow_filter, + *arrow_options); + } else { + arrow_filtered_datum = arrow::compute::Filter(arrow_table, + arrow_filter); + } + if (garrow::check(error, arrow_filtered_datum, "[table][filter]")) { + auto arrow_filtered_table = (*arrow_filtered_datum).table(); + return garrow_table_new_raw(&arrow_filtered_table); + } else { + return NULL; + } +} + +/** + * garrow_table_filter_chunked_array: + * @table: A #GArrowTable. + * @filter: The values indicates which values should be filtered out. + * @options: (nullable): A #GArrowFilterOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The #GArrowTable filterd + * with a chunked array filter. Nulls in the filter will + * result in nulls in the output. + * + * Since: 0.15.0 + */ +GArrowTable * +garrow_table_filter_chunked_array(GArrowTable *table, + GArrowChunkedArray *filter, + GArrowFilterOptions *options, + GError **error) +{ + auto arrow_table = garrow_table_get_raw(table); + auto arrow_filter = garrow_chunked_array_get_raw(filter); + arrow::Result<arrow::Datum> arrow_filtered_datum; + if (options) { + auto arrow_options = garrow_filter_options_get_raw(options); + arrow_filtered_datum = arrow::compute::Filter(arrow_table, + arrow_filter, + *arrow_options); + } else { + arrow_filtered_datum = arrow::compute::Filter(arrow_table, + arrow_filter); + } + if (garrow::check(error, + arrow_filtered_datum, + "[table][filter][chunked-array]")) { + auto arrow_filtered_table = (*arrow_filtered_datum).table(); + return garrow_table_new_raw(&arrow_filtered_table); + } else { + return NULL; + } +} + +/** + * garrow_chunked_array_filter: + * @chunked_array: A #GArrowChunkedArray. + * @filter: The values indicates which values should be filtered out. + * @options: (nullable): A #GArrowFilterOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The #GArrowChunkedArray filterd + * with a boolean selection filter. Nulls in the filter will + * result in nulls in the output. + * + * Since: 0.15.0 + */ +GArrowChunkedArray * +garrow_chunked_array_filter(GArrowChunkedArray *chunked_array, + GArrowBooleanArray *filter, + GArrowFilterOptions *options, + GError **error) +{ + auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array); + auto arrow_filter = garrow_array_get_raw(GARROW_ARRAY(filter)); + arrow::Result<arrow::Datum> arrow_filtered_datum; + if (options) { + auto arrow_options = garrow_filter_options_get_raw(options); + arrow_filtered_datum = arrow::compute::Filter(arrow_chunked_array, + arrow_filter, + *arrow_options); + } else { + arrow_filtered_datum = arrow::compute::Filter(arrow_chunked_array, + arrow_filter); + } + if (garrow::check(error, arrow_filtered_datum, "[chunked-array][filter]")) { + auto arrow_filtered_chunked_array = (*arrow_filtered_datum).chunked_array(); + return garrow_chunked_array_new_raw(&arrow_filtered_chunked_array); + } else { + return NULL; + } +} + +/** + * garrow_chunked_array_filter_chunked_array: + * @chunked_array: A #GArrowChunkedArray. + * @filter: The values indicates which values should be filtered out. + * @options: (nullable): A #GArrowFilterOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The #GArrowChunkedArray filterd + * with a chunked array filter. Nulls in the filter will + * result in nulls in the output. + * + * Since: 0.15.0 + */ +GArrowChunkedArray * +garrow_chunked_array_filter_chunked_array(GArrowChunkedArray *chunked_array, + GArrowChunkedArray *filter, + GArrowFilterOptions *options, + GError **error) +{ + auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array); + auto arrow_filter = garrow_chunked_array_get_raw(filter); + arrow::Result<arrow::Datum> arrow_filtered_datum; + if (options) { + auto arrow_options = garrow_filter_options_get_raw(options); + arrow_filtered_datum = arrow::compute::Filter(arrow_chunked_array, + arrow_filter, + *arrow_options); + } else { + arrow_filtered_datum = arrow::compute::Filter(arrow_chunked_array, + arrow_filter); + } + if (garrow::check(error, + arrow_filtered_datum, + "[chunked-array][filter][chunked-array]")) { + auto arrow_filtered_chunked_array = (*arrow_filtered_datum).chunked_array(); + return garrow_chunked_array_new_raw(&arrow_filtered_chunked_array); + } else { + return NULL; + } +} + +/** + * garrow_record_batch_filter: + * @record_batch: A #GArrowRecordBatch. + * @filter: The values indicates which values should be filtered out. + * @options: (nullable): A #GArrowFilterOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The #GArrowRecordBatch filterd + * with a boolean selection filter. Nulls in the filter will + * result in nulls in the output. + * + * Since: 0.15.0 + */ +GArrowRecordBatch * +garrow_record_batch_filter(GArrowRecordBatch *record_batch, + GArrowBooleanArray *filter, + GArrowFilterOptions *options, + GError **error) +{ + auto arrow_record_batch = garrow_record_batch_get_raw(record_batch); + auto arrow_filter = garrow_array_get_raw(GARROW_ARRAY(filter)); + arrow::Result<arrow::Datum> arrow_filtered_datum; + if (options) { + auto arrow_options = garrow_filter_options_get_raw(options); + arrow_filtered_datum = arrow::compute::Filter(arrow_record_batch, + arrow_filter, + *arrow_options); + } else { + arrow_filtered_datum = arrow::compute::Filter(arrow_record_batch, + arrow_filter); + } + if (garrow::check(error, arrow_filtered_datum, "[record-batch][filter]")) { + auto arrow_filtered_record_batch = (*arrow_filtered_datum).record_batch(); + return garrow_record_batch_new_raw(&arrow_filtered_record_batch); + } else { + return NULL; + } +} + +G_END_DECLS + +arrow::compute::ExecContext * +garrow_execute_context_get_raw(GArrowExecuteContext *context) +{ + auto priv = GARROW_EXECUTE_CONTEXT_GET_PRIVATE(context); + return &priv->context; +} + +arrow::compute::FunctionOptions * +garrow_function_options_get_raw(GArrowFunctionOptions *options) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(options); + return priv->options; +} + + +GArrowFunctionDoc * +garrow_function_doc_new_raw(const arrow::compute::FunctionDoc *arrow_doc) +{ + return GARROW_FUNCTION_DOC(g_object_new(GARROW_TYPE_FUNCTION_DOC, + "doc", arrow_doc, + NULL)); +} + +arrow::compute::FunctionDoc * +garrow_function_doc_get_raw(GArrowFunctionDoc *doc) +{ + auto priv = GARROW_FUNCTION_DOC_GET_PRIVATE(doc); + return priv->doc; +} + + +GArrowFunction * +garrow_function_new_raw(std::shared_ptr<arrow::compute::Function> *arrow_function) +{ + return GARROW_FUNCTION(g_object_new(GARROW_TYPE_FUNCTION, + "function", arrow_function, + NULL)); +} + +std::shared_ptr<arrow::compute::Function> +garrow_function_get_raw(GArrowFunction *function) +{ + auto priv = GARROW_FUNCTION_GET_PRIVATE(function); + return priv->function; +} + + +GArrowExecuteNodeOptions * +garrow_execute_node_options_new_raw( + arrow::compute::ExecNodeOptions *arrow_options) +{ + return GARROW_EXECUTE_NODE_OPTIONS( + g_object_new(GARROW_TYPE_EXECUTE_NODE_OPTIONS, + "options", arrow_options, + NULL)); +} + +arrow::compute::ExecNodeOptions * +garrow_execute_node_options_get_raw(GArrowExecuteNodeOptions *options) +{ + auto priv = GARROW_EXECUTE_NODE_OPTIONS_GET_PRIVATE(options); + return priv->options; +} + + +GArrowExecuteNode * +garrow_execute_node_new_raw(arrow::compute::ExecNode *arrow_node) +{ + return GARROW_EXECUTE_NODE(g_object_new(GARROW_TYPE_EXECUTE_NODE, + "node", arrow_node, + NULL)); +} + +arrow::compute::ExecNode * +garrow_execute_node_get_raw(GArrowExecuteNode *node) +{ + auto priv = GARROW_EXECUTE_NODE_GET_PRIVATE(node); + return priv->node; +} + + +std::shared_ptr<arrow::compute::ExecPlan> +garrow_execute_plan_get_raw(GArrowExecutePlan *plan) +{ + auto priv = GARROW_EXECUTE_PLAN_GET_PRIVATE(plan); + return priv->plan; +} + + +GArrowCastOptions * +garrow_cast_options_new_raw(arrow::compute::CastOptions *arrow_options) +{ + GArrowDataType *to_data_type = NULL; + if (arrow_options->to_type) { + to_data_type = garrow_data_type_new_raw(&(arrow_options->to_type)); + } + auto options = + g_object_new(GARROW_TYPE_CAST_OPTIONS, + "to-data-type", to_data_type, + "allow-int-overflow", arrow_options->allow_int_overflow, + "allow-time-truncate", arrow_options->allow_time_truncate, + "allow-time-overflow", arrow_options->allow_time_overflow, + "allow-decimal-truncate", arrow_options->allow_decimal_truncate, + "allow-float-truncate", arrow_options->allow_float_truncate, + "allow-invalid-utf8", arrow_options->allow_invalid_utf8, + NULL); + return GARROW_CAST_OPTIONS(options); +} + +arrow::compute::CastOptions * +garrow_cast_options_get_raw(GArrowCastOptions *options) +{ + return static_cast<arrow::compute::CastOptions *>( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} + +GArrowScalarAggregateOptions * +garrow_scalar_aggregate_options_new_raw( + arrow::compute::ScalarAggregateOptions *arrow_options) +{ + auto options = + g_object_new(GARROW_TYPE_SCALAR_AGGREGATE_OPTIONS, + "skip-nulls", arrow_options->skip_nulls, + "min-count", arrow_options->min_count, + NULL); + return GARROW_SCALAR_AGGREGATE_OPTIONS(options); +} + +arrow::compute::ScalarAggregateOptions * +garrow_scalar_aggregate_options_get_raw(GArrowScalarAggregateOptions *options) +{ + return static_cast<arrow::compute::ScalarAggregateOptions *>( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} + +arrow::compute::CountOptions * +garrow_count_options_get_raw(GArrowCountOptions *options) +{ + return static_cast<arrow::compute::CountOptions *>( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} + +arrow::compute::FilterOptions * +garrow_filter_options_get_raw(GArrowFilterOptions *options) +{ + return static_cast<arrow::compute::FilterOptions *>( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} + +arrow::compute::TakeOptions * +garrow_take_options_get_raw(GArrowTakeOptions *options) +{ + return static_cast<arrow::compute::TakeOptions *>( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} + +arrow::compute::ArraySortOptions * +garrow_array_sort_options_get_raw(GArrowArraySortOptions *options) +{ + return static_cast<arrow::compute::ArraySortOptions *>( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} + +arrow::compute::SortKey * +garrow_sort_key_get_raw(GArrowSortKey *sort_key) +{ + auto priv = GARROW_SORT_KEY_GET_PRIVATE(sort_key); + return &(priv->sort_key); +} + +arrow::compute::SortOptions * +garrow_sort_options_get_raw(GArrowSortOptions *options) +{ + return static_cast<arrow::compute::SortOptions *>( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} + +arrow::compute::SetLookupOptions * +garrow_set_lookup_options_get_raw(GArrowSetLookupOptions *options) +{ + return static_cast<arrow::compute::SetLookupOptions *>( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} + + +arrow::compute::VarianceOptions * +garrow_variance_options_get_raw(GArrowVarianceOptions *options) +{ + return static_cast<arrow::compute::VarianceOptions *>( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/src/arrow/c_glib/arrow-glib/compute.h b/src/arrow/c_glib/arrow-glib/compute.h new file mode 100644 index 000000000..2171d6abd --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/compute.h @@ -0,0 +1,687 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/datum.h> +#include <arrow-glib/reader.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_EXECUTE_CONTEXT (garrow_execute_context_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowExecuteContext, + garrow_execute_context, + GARROW, + EXECUTE_CONTEXT, + GObject) +struct _GArrowExecuteContextClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_1_0 +GArrowExecuteContext *garrow_execute_context_new(void); + + +#define GARROW_TYPE_FUNCTION_OPTIONS (garrow_function_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowFunctionOptions, + garrow_function_options, + GARROW, + FUNCTION_OPTIONS, + GObject) +struct _GArrowFunctionOptionsClass +{ + GObjectClass parent_class; +}; + + +#define GARROW_TYPE_FUNCTION_DOC (garrow_function_doc_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowFunctionDoc, + garrow_function_doc, + GARROW, + FUNCTION_DOC, + GObject) +struct _GArrowFunctionDocClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +gchar * +garrow_function_doc_get_summary(GArrowFunctionDoc *doc); +GARROW_AVAILABLE_IN_6_0 +gchar * +garrow_function_doc_get_description(GArrowFunctionDoc *doc); +GARROW_AVAILABLE_IN_6_0 +gchar ** +garrow_function_doc_get_arg_names(GArrowFunctionDoc *doc); +GARROW_AVAILABLE_IN_6_0 +gchar * +garrow_function_doc_get_options_class_name(GArrowFunctionDoc *doc); + + +#define GARROW_TYPE_FUNCTION (garrow_function_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowFunction, + garrow_function, + GARROW, + FUNCTION, + GObject) +struct _GArrowFunctionClass +{ + GObjectClass parent_class; +}; + + +GARROW_AVAILABLE_IN_1_0 +GArrowFunction *garrow_function_find(const gchar *name); + +GARROW_AVAILABLE_IN_1_0 +GArrowDatum *garrow_function_execute(GArrowFunction *function, + GList *args, + GArrowFunctionOptions *options, + GArrowExecuteContext *context, + GError **error); + +GARROW_AVAILABLE_IN_6_0 +GArrowFunctionDoc * +garrow_function_get_doc(GArrowFunction *function); + + +#define GARROW_TYPE_EXECUTE_NODE_OPTIONS (garrow_execute_node_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowExecuteNodeOptions, + garrow_execute_node_options, + GARROW, + EXECUTE_NODE_OPTIONS, + GObject) +struct _GArrowExecuteNodeOptionsClass +{ + GObjectClass parent_class; +}; + + +#define GARROW_TYPE_SOURCE_NODE_OPTIONS (garrow_source_node_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowSourceNodeOptions, + garrow_source_node_options, + GARROW, + SOURCE_NODE_OPTIONS, + GArrowExecuteNodeOptions) +struct _GArrowSourceNodeOptionsClass +{ + GArrowExecuteNodeOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GArrowSourceNodeOptions * +garrow_source_node_options_new_record_batch_reader( + GArrowRecordBatchReader *reader); +GARROW_AVAILABLE_IN_6_0 +GArrowSourceNodeOptions * +garrow_source_node_options_new_record_batch(GArrowRecordBatch *record_batch); +GARROW_AVAILABLE_IN_6_0 +GArrowSourceNodeOptions * +garrow_source_node_options_new_table(GArrowTable *table); + + +#define GARROW_TYPE_AGGREGATION (garrow_aggregation_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowAggregation, + garrow_aggregation, + GARROW, + AGGREGATION, + GObject) +struct _GArrowAggregationClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GArrowAggregation * +garrow_aggregation_new(const gchar *function, + GArrowFunctionOptions *options, + const gchar *input, + const gchar *output); + +#define GARROW_TYPE_AGGREGATE_NODE_OPTIONS \ + (garrow_aggregate_node_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowAggregateNodeOptions, + garrow_aggregate_node_options, + GARROW, + AGGREGATE_NODE_OPTIONS, + GArrowExecuteNodeOptions) +struct _GArrowAggregateNodeOptionsClass +{ + GArrowExecuteNodeOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GArrowAggregateNodeOptions * +garrow_aggregate_node_options_new(GList *aggregations, + const gchar **keys, + gsize n_keys, + GError **error); + + +#define GARROW_TYPE_SINK_NODE_OPTIONS (garrow_sink_node_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowSinkNodeOptions, + garrow_sink_node_options, + GARROW, + SINK_NODE_OPTIONS, + GArrowExecuteNodeOptions) +struct _GArrowSinkNodeOptionsClass +{ + GArrowExecuteNodeOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GArrowSinkNodeOptions * +garrow_sink_node_options_new(void); +GARROW_AVAILABLE_IN_6_0 +GArrowRecordBatchReader * +garrow_sink_node_options_get_reader(GArrowSinkNodeOptions *options, + GArrowSchema *schema); + + +#define GARROW_TYPE_EXECUTE_NODE (garrow_execute_node_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowExecuteNode, + garrow_execute_node, + GARROW, + EXECUTE_NODE, + GObject) +struct _GArrowExecuteNodeClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +const gchar * +garrow_execute_node_get_kind_name(GArrowExecuteNode *node); +GARROW_AVAILABLE_IN_6_0 +GArrowSchema * +garrow_execute_node_get_output_schema(GArrowExecuteNode *node); + + +#define GARROW_TYPE_EXECUTE_PLAN (garrow_execute_plan_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowExecutePlan, + garrow_execute_plan, + GARROW, + EXECUTE_PLAN, + GObject) +struct _GArrowExecutePlanClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GArrowExecutePlan * +garrow_execute_plan_new(GError **error); +GARROW_AVAILABLE_IN_6_0 +GArrowExecuteNode * +garrow_execute_plan_build_node(GArrowExecutePlan *plan, + const gchar *factory_name, + GList *inputs, + GArrowExecuteNodeOptions *options, + GError **error); +GARROW_AVAILABLE_IN_6_0 +GArrowExecuteNode * +garrow_execute_plan_build_source_node(GArrowExecutePlan *plan, + GArrowSourceNodeOptions *options, + GError **error); +GARROW_AVAILABLE_IN_6_0 +GArrowExecuteNode * +garrow_execute_plan_build_aggregate_node(GArrowExecutePlan *plan, + GArrowExecuteNode *input, + GArrowAggregateNodeOptions *options, + GError **error); +GARROW_AVAILABLE_IN_6_0 +GArrowExecuteNode * +garrow_execute_plan_build_sink_node(GArrowExecutePlan *plan, + GArrowExecuteNode *input, + GArrowSinkNodeOptions *options, + GError **error); +GARROW_AVAILABLE_IN_6_0 +gboolean +garrow_execute_plan_validate(GArrowExecutePlan *plan, + GError **error); +GARROW_AVAILABLE_IN_6_0 +gboolean +garrow_execute_plan_start(GArrowExecutePlan *plan, + GError **error); +GARROW_AVAILABLE_IN_6_0 +void +garrow_execute_plan_stop(GArrowExecutePlan *plan); +GARROW_AVAILABLE_IN_6_0 +void +garrow_execute_plan_wait(GArrowExecutePlan *plan); + + +#define GARROW_TYPE_CAST_OPTIONS (garrow_cast_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowCastOptions, + garrow_cast_options, + GARROW, + CAST_OPTIONS, + GArrowFunctionOptions) +struct _GArrowCastOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GArrowCastOptions *garrow_cast_options_new(void); + + +#define GARROW_TYPE_SCALAR_AGGREGATE_OPTIONS \ + (garrow_scalar_aggregate_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowScalarAggregateOptions, + garrow_scalar_aggregate_options, + GARROW, + SCALAR_AGGREGATE_OPTIONS, + GArrowFunctionOptions) +struct _GArrowScalarAggregateOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowScalarAggregateOptions * +garrow_scalar_aggregate_options_new(void); + +/** + * GArrowCountMode: + * @GARROW_COUNT_MODE_ONLY_VALID: + * Only non-null values will be counted. + * @GARROW_COUNT_MODE_ONLY_NULL: + * Only null values will be counted. + * @GARROW_COUNT_MODE_ALL: + * All will be counted. + * + * They correspond to the values of `arrow::compute::CountOptions::CountMode`. + */ +typedef enum { + GARROW_COUNT_MODE_ONLY_VALID, + GARROW_COUNT_MODE_ONLY_NULL, + GARROW_COUNT_MODE_ALL, +} GArrowCountMode; + +#define GARROW_TYPE_COUNT_OPTIONS (garrow_count_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowCountOptions, + garrow_count_options, + GARROW, + COUNT_OPTIONS, + GArrowFunctionOptions) +struct _GArrowCountOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GArrowCountOptions * +garrow_count_options_new(void); + + +/** + * GArrowFilterNullSelectionBehavior: + * @GARROW_FILTER_NULL_SELECTION_DROP: + * Filtered value will be removed in the output. + * @GARROW_FILTER_NULL_SELECTION_EMIT_NULL: + * Filtered value will be null in the output. + * + * They are corresponding to + * `arrow::compute::FilterOptions::NullSelectionBehavior` values. + */ +typedef enum { + GARROW_FILTER_NULL_SELECTION_DROP, + GARROW_FILTER_NULL_SELECTION_EMIT_NULL, +} GArrowFilterNullSelectionBehavior; + +#define GARROW_TYPE_FILTER_OPTIONS (garrow_filter_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowFilterOptions, + garrow_filter_options, + GARROW, + FILTER_OPTIONS, + GArrowFunctionOptions) +struct _GArrowFilterOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_17 +GArrowFilterOptions * +garrow_filter_options_new(void); + + +#define GARROW_TYPE_TAKE_OPTIONS (garrow_take_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowTakeOptions, + garrow_take_options, + GARROW, + TAKE_OPTIONS, + GArrowFunctionOptions) +struct _GArrowTakeOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_14 +GArrowTakeOptions * +garrow_take_options_new(void); + + +/** + * GArrowSortOrder: + * @GARROW_SORT_ORDER_ASCENDING: Sort in ascending order. + * @GARROW_SORT_ORDER_DESCENDING: Sort in descending order. + * + * They are corresponding to `arrow::compute::SortOrder` values. + * + * Since: 3.0.0 + */ +typedef enum { + GARROW_SORT_ORDER_ASCENDING, + GARROW_SORT_ORDER_DESCENDING, +} GArrowSortOrder; + +#define GARROW_TYPE_ARRAY_SORT_OPTIONS (garrow_array_sort_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowArraySortOptions, + garrow_array_sort_options, + GARROW, + ARRAY_SORT_OPTIONS, + GArrowFunctionOptions) +struct _GArrowArraySortOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_3_0 +GArrowArraySortOptions * +garrow_array_sort_options_new(GArrowSortOrder order); +GARROW_AVAILABLE_IN_3_0 +gboolean +garrow_array_sort_options_equal(GArrowArraySortOptions *options, + GArrowArraySortOptions *other_options); + + +#define GARROW_TYPE_SORT_KEY (garrow_sort_key_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowSortKey, + garrow_sort_key, + GARROW, + SORT_KEY, + GObject) +struct _GArrowSortKeyClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_3_0 +GArrowSortKey * +garrow_sort_key_new(const gchar *name, GArrowSortOrder order); + +GARROW_AVAILABLE_IN_3_0 +gboolean +garrow_sort_key_equal(GArrowSortKey *sort_key, + GArrowSortKey *other_sort_key); + + +#define GARROW_TYPE_SORT_OPTIONS (garrow_sort_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowSortOptions, + garrow_sort_options, + GARROW, + SORT_OPTIONS, + GArrowFunctionOptions) +struct _GArrowSortOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_3_0 +GArrowSortOptions * +garrow_sort_options_new(GList *sort_keys); +GARROW_AVAILABLE_IN_3_0 +gboolean +garrow_sort_options_equal(GArrowSortOptions *options, + GArrowSortOptions *other_options); +GARROW_AVAILABLE_IN_3_0 +GList * +garrow_sort_options_get_sort_keys(GArrowSortOptions *options); +GARROW_AVAILABLE_IN_3_0 +void +garrow_sort_options_set_sort_keys(GArrowSortOptions *options, + GList *sort_keys); +GARROW_AVAILABLE_IN_3_0 +void +garrow_sort_options_add_sort_key(GArrowSortOptions *options, + GArrowSortKey *sort_key); + + +#define GARROW_TYPE_SET_LOOKUP_OPTIONS (garrow_set_lookup_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowSetLookupOptions, + garrow_set_lookup_options, + GARROW, + SET_LOOKUP_OPTIONS, + GArrowFunctionOptions) +struct _GArrowSetLookupOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GArrowSetLookupOptions * +garrow_set_lookup_options_new(GArrowDatum *value_set); + + +#define GARROW_TYPE_VARIANCE_OPTIONS (garrow_variance_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowVarianceOptions, + garrow_variance_options, + GARROW, + VARIANCE_OPTIONS, + GArrowFunctionOptions) +struct _GArrowVarianceOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GArrowVarianceOptions * +garrow_variance_options_new(void); + + +GArrowArray *garrow_array_cast(GArrowArray *array, + GArrowDataType *target_data_type, + GArrowCastOptions *options, + GError **error); +GArrowArray *garrow_array_unique(GArrowArray *array, + GError **error); +GArrowDictionaryArray *garrow_array_dictionary_encode(GArrowArray *array, + GError **error); +GARROW_AVAILABLE_IN_0_13 +gint64 garrow_array_count(GArrowArray *array, + GArrowCountOptions *options, + GError **error); +GARROW_AVAILABLE_IN_0_13 +GArrowStructArray *garrow_array_count_values(GArrowArray *array, + GError **error); + +GARROW_AVAILABLE_IN_0_13 +GArrowBooleanArray *garrow_boolean_array_invert(GArrowBooleanArray *array, + GError **error); +GARROW_AVAILABLE_IN_0_13 +GArrowBooleanArray *garrow_boolean_array_and(GArrowBooleanArray *left, + GArrowBooleanArray *right, + GError **error); +GARROW_AVAILABLE_IN_0_13 +GArrowBooleanArray *garrow_boolean_array_or(GArrowBooleanArray *left, + GArrowBooleanArray *right, + GError **error); +GARROW_AVAILABLE_IN_0_13 +GArrowBooleanArray *garrow_boolean_array_xor(GArrowBooleanArray *left, + GArrowBooleanArray *right, + GError **error); + +GARROW_AVAILABLE_IN_0_13 +gdouble garrow_numeric_array_mean(GArrowNumericArray *array, + GError **error); + +GARROW_AVAILABLE_IN_0_13 +gint64 garrow_int8_array_sum(GArrowInt8Array *array, + GError **error); +GARROW_AVAILABLE_IN_0_13 +guint64 garrow_uint8_array_sum(GArrowUInt8Array *array, + GError **error); +GARROW_AVAILABLE_IN_0_13 +gint64 garrow_int16_array_sum(GArrowInt16Array *array, + GError **error); +GARROW_AVAILABLE_IN_0_13 +guint64 garrow_uint16_array_sum(GArrowUInt16Array *array, + GError **error); +GARROW_AVAILABLE_IN_0_13 +gint64 garrow_int32_array_sum(GArrowInt32Array *array, + GError **error); +GARROW_AVAILABLE_IN_0_13 +guint64 garrow_uint32_array_sum(GArrowUInt32Array *array, + GError **error); +GARROW_AVAILABLE_IN_0_13 +gint64 garrow_int64_array_sum(GArrowInt64Array *array, + GError **error); +GARROW_AVAILABLE_IN_0_13 +guint64 garrow_uint64_array_sum(GArrowUInt64Array *array, + GError **error); +GARROW_AVAILABLE_IN_0_13 +gdouble garrow_float_array_sum(GArrowFloatArray *array, + GError **error); +GARROW_AVAILABLE_IN_0_13 +gdouble garrow_double_array_sum(GArrowDoubleArray *array, + GError **error); +GARROW_AVAILABLE_IN_0_14 +GArrowArray *garrow_array_take(GArrowArray *array, + GArrowArray *indices, + GArrowTakeOptions *options, + GError **error); +GARROW_AVAILABLE_IN_0_16 +GArrowChunkedArray * +garrow_array_take_chunked_array(GArrowArray *array, + GArrowChunkedArray *indices, + GArrowTakeOptions *options, + GError **error); +GARROW_AVAILABLE_IN_0_16 +GArrowTable * +garrow_table_take(GArrowTable *table, + GArrowArray *indices, + GArrowTakeOptions *options, + GError **error); +GARROW_AVAILABLE_IN_0_16 +GArrowTable * +garrow_table_take_chunked_array(GArrowTable *table, + GArrowChunkedArray *indices, + GArrowTakeOptions *options, + GError **error); +GARROW_AVAILABLE_IN_0_16 +GArrowChunkedArray * +garrow_chunked_array_take(GArrowChunkedArray *chunked_array, + GArrowArray *indices, + GArrowTakeOptions *options, + GError **error); +GARROW_AVAILABLE_IN_0_16 +GArrowChunkedArray * +garrow_chunked_array_take_chunked_array(GArrowChunkedArray *chunked_array, + GArrowChunkedArray *indices, + GArrowTakeOptions *options, + GError **error); +GARROW_AVAILABLE_IN_0_16 +GArrowRecordBatch * +garrow_record_batch_take(GArrowRecordBatch *record_batch, + GArrowArray *indices, + GArrowTakeOptions *options, + GError **error); +GARROW_AVAILABLE_IN_0_15 +GArrowArray * +garrow_array_filter(GArrowArray *array, + GArrowBooleanArray *filter, + GArrowFilterOptions *options, + GError **error); +GARROW_AVAILABLE_IN_0_15 +GArrowBooleanArray * +garrow_array_is_in(GArrowArray *left, + GArrowArray *right, + GError **error); +GARROW_AVAILABLE_IN_0_15 +GArrowBooleanArray * +garrow_array_is_in_chunked_array(GArrowArray *left, + GArrowChunkedArray *right, + GError **error); + + +GARROW_AVAILABLE_IN_3_0 +GArrowUInt64Array * +garrow_array_sort_indices(GArrowArray *array, + GArrowSortOrder order, + GError **error); +GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_sort_indices) +GARROW_AVAILABLE_IN_0_15 +GArrowUInt64Array * +garrow_array_sort_to_indices(GArrowArray *array, + GError **error); + +GARROW_AVAILABLE_IN_3_0 +GArrowUInt64Array * +garrow_chunked_array_sort_indices(GArrowChunkedArray *chunked_array, + GArrowSortOrder order, + GError **error); + + +GARROW_AVAILABLE_IN_3_0 +GArrowUInt64Array * +garrow_record_batch_sort_indices(GArrowRecordBatch *record_batch, + GArrowSortOptions *options, + GError **error); + +GARROW_AVAILABLE_IN_3_0 +GArrowUInt64Array * +garrow_table_sort_indices(GArrowTable *table, + GArrowSortOptions *options, + GError **error); + + +GARROW_AVAILABLE_IN_0_16 +GArrowTable * +garrow_table_filter(GArrowTable *table, + GArrowBooleanArray *filter, + GArrowFilterOptions *options, + GError **error); +GARROW_AVAILABLE_IN_0_16 +GArrowTable * +garrow_table_filter_chunked_array(GArrowTable *table, + GArrowChunkedArray *filter, + GArrowFilterOptions *options, + GError **error); +GARROW_AVAILABLE_IN_0_16 +GArrowChunkedArray * +garrow_chunked_array_filter(GArrowChunkedArray *chunked_array, + GArrowBooleanArray *filter, + GArrowFilterOptions *options, + GError **error); +GARROW_AVAILABLE_IN_0_16 +GArrowChunkedArray * +garrow_chunked_array_filter_chunked_array(GArrowChunkedArray *chunked_array, + GArrowChunkedArray *filter, + GArrowFilterOptions *options, + GError **error); +GARROW_AVAILABLE_IN_0_16 +GArrowRecordBatch * +garrow_record_batch_filter(GArrowRecordBatch *record_batch, + GArrowBooleanArray *filter, + GArrowFilterOptions *options, + GError **error); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/compute.hpp b/src/arrow/c_glib/arrow-glib/compute.hpp new file mode 100644 index 000000000..88f55d532 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/compute.hpp @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/compute/api.h> + +#include <arrow-glib/compute.h> + + +arrow::compute::ExecContext * +garrow_execute_context_get_raw(GArrowExecuteContext *context); + +arrow::compute::FunctionOptions * +garrow_function_options_get_raw(GArrowFunctionOptions *options); + +GArrowFunctionDoc * +garrow_function_doc_new_raw(const arrow::compute::FunctionDoc *arrow_doc); +arrow::compute::FunctionDoc * +garrow_function_doc_get_raw(GArrowFunctionDoc *doc); + +GArrowFunction * +garrow_function_new_raw(std::shared_ptr<arrow::compute::Function> *arrow_function); +std::shared_ptr<arrow::compute::Function> +garrow_function_get_raw(GArrowFunction *function); + + +GArrowExecuteNodeOptions * +garrow_execute_node_options_new_raw( + arrow::compute::ExecNodeOptions *arrow_options); +arrow::compute::ExecNodeOptions * +garrow_execute_node_options_get_raw(GArrowExecuteNodeOptions *options); + + +GArrowExecuteNode * +garrow_execute_node_new_raw(arrow::compute::ExecNode *arrow_node); +arrow::compute::ExecNode * +garrow_execute_node_get_raw(GArrowExecuteNode *node); + + +std::shared_ptr<arrow::compute::ExecPlan> +garrow_execute_plan_get_raw(GArrowExecutePlan *plan); + + +GArrowCastOptions * +garrow_cast_options_new_raw(arrow::compute::CastOptions *arrow_options); +arrow::compute::CastOptions * +garrow_cast_options_get_raw(GArrowCastOptions *options); + + +GArrowScalarAggregateOptions * +garrow_scalar_aggregate_options_new_raw( + arrow::compute::ScalarAggregateOptions *arrow_options); +arrow::compute::ScalarAggregateOptions * +garrow_scalar_aggregate_options_get_raw( + GArrowScalarAggregateOptions *options); + + +arrow::compute::CountOptions * +garrow_count_options_get_raw(GArrowCountOptions *options); + + +arrow::compute::FilterOptions * +garrow_filter_options_get_raw(GArrowFilterOptions *options); + + +arrow::compute::TakeOptions * +garrow_take_options_get_raw(GArrowTakeOptions *options); + + +arrow::compute::ArraySortOptions * +garrow_array_sort_options_get_raw(GArrowArraySortOptions *options); + + +arrow::compute::SortKey * +garrow_sort_key_get_raw(GArrowSortKey *sort_key); + + +arrow::compute::SortOptions * +garrow_sort_options_get_raw(GArrowSortOptions *options); + + +arrow::compute::SetLookupOptions * +garrow_set_lookup_options_get_raw(GArrowSetLookupOptions *options); + + +arrow::compute::VarianceOptions * +garrow_variance_options_get_raw(GArrowVarianceOptions *options); diff --git a/src/arrow/c_glib/arrow-glib/data-type.h b/src/arrow/c_glib/arrow-glib/data-type.h new file mode 100644 index 000000000..6067b7234 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/data-type.h @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/basic-data-type.h> +#include <arrow-glib/composite-data-type.h> diff --git a/src/arrow/c_glib/arrow-glib/data-type.hpp b/src/arrow/c_glib/arrow-glib/data-type.hpp new file mode 100644 index 000000000..77ab90321 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/data-type.hpp @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/basic-data-type.hpp> +#include <arrow-glib/composite-data-type.h> diff --git a/src/arrow/c_glib/arrow-glib/datum.cpp b/src/arrow/c_glib/arrow-glib/datum.cpp new file mode 100644 index 000000000..66993d6c2 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/datum.cpp @@ -0,0 +1,834 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/array.hpp> +#include <arrow-glib/chunked-array.hpp> +#include <arrow-glib/datum.hpp> +#include <arrow-glib/record-batch.hpp> +#include <arrow-glib/scalar.hpp> +#include <arrow-glib/table.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: datum + * @section_id: datum-classes + * @title: Datum classes + * @include: arrow-glib/arrow-glib.h + * + * #GArrowDatum is an abstract class to hold a datum. Subclasses such + * as #GArrowArrayDatum and #GArrowTableDatum can hold a specific + * datum. + * + * #GArrowArrayDatum is a class to hold an #GArrowArray. + * + * #GArrowChunkedArrayDatum is a class to hold an #GArrowChunkedArray. + * + * #GArrowRecordBatchDatum is a class to hold an #GArrowRecordBatch. + * + * #GArrowTableDatum is a class to hold an #GArrowTable. + */ + +typedef struct GArrowDatumPrivate_ { + arrow::Datum datum; +} GArrowDatumPrivate; + +enum { + PROP_DATUM = 1, +}; + +G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowDatum, garrow_datum, G_TYPE_OBJECT) + +#define GARROW_DATUM_GET_PRIVATE(obj) \ + static_cast<GArrowDatumPrivate *>( \ + garrow_datum_get_instance_private( \ + GARROW_DATUM(obj))) + +static void +garrow_datum_finalize(GObject *object) +{ + auto priv = GARROW_DATUM_GET_PRIVATE(object); + + priv->datum.~Datum(); + + G_OBJECT_CLASS(garrow_datum_parent_class)->finalize(object); +} + +static void +garrow_datum_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_DATUM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_DATUM: + priv->datum = *static_cast<arrow::Datum *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_datum_init(GArrowDatum *object) +{ + auto priv = GARROW_DATUM_GET_PRIVATE(object); + new(&priv->datum) arrow::Datum; +} + +static void +garrow_datum_class_init(GArrowDatumClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_datum_finalize; + gobject_class->set_property = garrow_datum_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("datum", + "Datum", + "The raw arrow::Datum *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_DATUM, spec); +} + +/** + * garrow_datum_is_array: + * @datum: A #GArrowDatum. + * + * Returns: %TRUE if the datum holds a #GArrowArray, %FALSE + * otherwise. + * + * Since: 1.0.0 + */ +gboolean +garrow_datum_is_array(GArrowDatum *datum) +{ + const auto &arrow_datum = garrow_datum_get_raw(datum); + return arrow_datum.is_array(); +} + +/** + * garrow_datum_is_array_like: + * @datum: A #GArrowDatum. + * + * Returns: %TRUE if the datum holds a #GArrowArray or + * #GArrowChunkedArray, %FALSE otherwise. + * + * Since: 1.0.0 + */ +gboolean +garrow_datum_is_array_like(GArrowDatum *datum) +{ + const auto &arrow_datum = garrow_datum_get_raw(datum); + return arrow_datum.is_arraylike(); +} + +/** + * garrow_datum_is_scalar: + * @datum: A #GArrowDatum. + * + * Returns: %TRUE if the datum holds a #GArrowScalar, %FALSE otherwise. + * + * Since: 5.0.0 + */ +gboolean +garrow_datum_is_scalar(GArrowDatum *datum) +{ + const auto &arrow_datum = garrow_datum_get_raw(datum); + return arrow_datum.is_scalar(); +} + +/** + * garrow_datum_is_value: + * @datum: A #GArrowDatum. + * + * Returns: %TRUE if the datum holds a #GArrowArray, #GArrowChunkedArray or + * #GArrowScalar, %FALSE otherwise. + * + * Since: 5.0.0 + */ +gboolean +garrow_datum_is_value(GArrowDatum *datum) +{ + const auto &arrow_datum = garrow_datum_get_raw(datum); + return arrow_datum.is_value(); +} + +/** + * garrow_datum_equal: + * @datum: A #GArrowDatum. + * @other_datum: A #GArrowDatum to be compared. + * + * Returns: %TRUE if both of them have the same datum, %FALSE + * otherwise. + * + * Since: 1.0.0 + */ +gboolean +garrow_datum_equal(GArrowDatum *datum, GArrowDatum *other_datum) +{ + const auto &arrow_datum = garrow_datum_get_raw(datum); + const auto &arrow_other_datum = garrow_datum_get_raw(other_datum); + return arrow_datum.Equals(arrow_other_datum); +} + +/** + * garrow_datum_to_string: + * @datum: A #GArrowDatum. + * + * Returns: The formatted datum content. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 1.0.0 + */ +gchar * +garrow_datum_to_string(GArrowDatum *datum) +{ + const auto &arrow_datum = garrow_datum_get_raw(datum); + return g_strdup(arrow_datum.ToString().c_str()); +} + + +typedef struct GArrowArrayDatumPrivate_ { + GArrowArray *value; +} GArrowArrayDatumPrivate; + +enum { + PROP_VALUE = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowArrayDatum, + garrow_array_datum, + GARROW_TYPE_DATUM) + +#define GARROW_ARRAY_DATUM_GET_PRIVATE(obj) \ + static_cast<GArrowArrayDatumPrivate *>( \ + garrow_array_datum_get_instance_private( \ + GARROW_ARRAY_DATUM(obj))) + +static void +garrow_array_datum_dispose(GObject *object) +{ + auto priv = GARROW_ARRAY_DATUM_GET_PRIVATE(object); + + if (priv->value) { + g_object_unref(priv->value); + priv->value = NULL; + } + + G_OBJECT_CLASS(garrow_array_datum_parent_class)->dispose(object); +} + +static void +garrow_array_datum_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_ARRAY_DATUM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_VALUE: + priv->value = GARROW_ARRAY(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_array_datum_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_ARRAY_DATUM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_VALUE: + g_value_set_object(value, priv->value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_array_datum_init(GArrowArrayDatum *object) +{ +} + +static void +garrow_array_datum_class_init(GArrowArrayDatumClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_array_datum_dispose; + gobject_class->set_property = garrow_array_datum_set_property; + gobject_class->get_property = garrow_array_datum_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("value", + "Value", + "The array held by this datum", + GARROW_TYPE_ARRAY, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_VALUE, spec); +} + +/** + * garrow_array_datum_new: + * @value: A #GArrowArray. + * + * Returns: A newly created #GArrowArrayDatum. + * + * Since: 1.0.0 + */ +GArrowArrayDatum * +garrow_array_datum_new(GArrowArray *value) +{ + auto arrow_value = garrow_array_get_raw(value); + arrow::Datum arrow_datum(arrow_value); + return garrow_array_datum_new_raw(&arrow_datum, value); +} + + +typedef struct GArrowScalarDatumPrivate_ { + GArrowScalar *value; +} GArrowScalarDatumPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowScalarDatum, + garrow_scalar_datum, + GARROW_TYPE_DATUM) + +#define GARROW_SCALAR_DATUM_GET_PRIVATE(obj) \ + static_cast<GArrowScalarDatumPrivate *>( \ + garrow_scalar_datum_get_instance_private( \ + GARROW_SCALAR_DATUM(obj))) + +static void +garrow_scalar_datum_dispose(GObject *object) +{ + auto priv = GARROW_SCALAR_DATUM_GET_PRIVATE(object); + + if (priv->value) { + g_object_unref(priv->value); + priv->value = NULL; + } + + G_OBJECT_CLASS(garrow_scalar_datum_parent_class)->dispose(object); +} + +static void +garrow_scalar_datum_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_SCALAR_DATUM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_VALUE: + priv->value = GARROW_SCALAR(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_scalar_datum_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_SCALAR_DATUM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_VALUE: + g_value_set_object(value, priv->value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_scalar_datum_init(GArrowScalarDatum *object) +{ +} + +static void +garrow_scalar_datum_class_init(GArrowScalarDatumClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_scalar_datum_dispose; + gobject_class->set_property = garrow_scalar_datum_set_property; + gobject_class->get_property = garrow_scalar_datum_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("value", + "Value", + "The scalar held by this datum", + GARROW_TYPE_SCALAR, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_VALUE, spec); +} + +/** + * garrow_scalar_datum_new: + * @value: A #GArrowScalar. + * + * Returns: A newly created #GArrowScalarDatum. + * + * Since: 5.0.0 + */ +GArrowScalarDatum * +garrow_scalar_datum_new(GArrowScalar *value) +{ + auto arrow_value = garrow_scalar_get_raw(value); + arrow::Datum arrow_datum(arrow_value); + return garrow_scalar_datum_new_raw(&arrow_datum, value); +} + + +typedef struct GArrowChunkedArrayDatumPrivate_ { + GArrowChunkedArray *value; +} GArrowChunkedArrayDatumPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowChunkedArrayDatum, + garrow_chunked_array_datum, + GARROW_TYPE_DATUM) + +#define GARROW_CHUNKED_ARRAY_DATUM_GET_PRIVATE(obj) \ + static_cast<GArrowChunkedArrayDatumPrivate *>( \ + garrow_chunked_array_datum_get_instance_private( \ + GARROW_CHUNKED_ARRAY_DATUM(obj))) + +static void +garrow_chunked_array_datum_dispose(GObject *object) +{ + auto priv = GARROW_CHUNKED_ARRAY_DATUM_GET_PRIVATE(object); + + if (priv->value) { + g_object_unref(priv->value); + priv->value = NULL; + } + + G_OBJECT_CLASS(garrow_array_datum_parent_class)->dispose(object); +} + +static void +garrow_chunked_array_datum_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_CHUNKED_ARRAY_DATUM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_VALUE: + priv->value = GARROW_CHUNKED_ARRAY(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_chunked_array_datum_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_CHUNKED_ARRAY_DATUM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_VALUE: + g_value_set_object(value, priv->value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_chunked_array_datum_init(GArrowChunkedArrayDatum *object) +{ +} + +static void +garrow_chunked_array_datum_class_init(GArrowChunkedArrayDatumClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_chunked_array_datum_dispose; + gobject_class->set_property = garrow_chunked_array_datum_set_property; + gobject_class->get_property = garrow_chunked_array_datum_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("value", + "Value", + "The chunked array held by this datum", + GARROW_TYPE_CHUNKED_ARRAY, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_VALUE, spec); +} + +/** + * garrow_chunked_array_datum_new: + * @value: A #GArrowChunkedArray. + * + * Returns: A newly created #GArrowChunkedArrayDatum. + * + * Since: 1.0.0 + */ +GArrowChunkedArrayDatum * +garrow_chunked_array_datum_new(GArrowChunkedArray *value) +{ + auto arrow_value = garrow_chunked_array_get_raw(value); + arrow::Datum arrow_datum(arrow_value); + return garrow_chunked_array_datum_new_raw(&arrow_datum, value); +} + + +typedef struct GArrowRecordBatchDatumPrivate_ { + GArrowRecordBatch *value; +} GArrowRecordBatchDatumPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowRecordBatchDatum, + garrow_record_batch_datum, + GARROW_TYPE_DATUM) + +#define GARROW_RECORD_BATCH_DATUM_GET_PRIVATE(obj) \ + static_cast<GArrowRecordBatchDatumPrivate *>( \ + garrow_record_batch_datum_get_instance_private( \ + GARROW_RECORD_BATCH_DATUM(obj))) + +static void +garrow_record_batch_datum_dispose(GObject *object) +{ + auto priv = GARROW_RECORD_BATCH_DATUM_GET_PRIVATE(object); + + if (priv->value) { + g_object_unref(priv->value); + priv->value = NULL; + } + + G_OBJECT_CLASS(garrow_array_datum_parent_class)->dispose(object); +} + +static void +garrow_record_batch_datum_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_RECORD_BATCH_DATUM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_VALUE: + priv->value = GARROW_RECORD_BATCH(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_record_batch_datum_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_RECORD_BATCH_DATUM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_VALUE: + g_value_set_object(value, priv->value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_record_batch_datum_init(GArrowRecordBatchDatum *object) +{ +} + +static void +garrow_record_batch_datum_class_init(GArrowRecordBatchDatumClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_record_batch_datum_dispose; + gobject_class->set_property = garrow_record_batch_datum_set_property; + gobject_class->get_property = garrow_record_batch_datum_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("value", + "Value", + "The chunked array held by this datum", + GARROW_TYPE_RECORD_BATCH, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_VALUE, spec); +} + +/** + * garrow_record_batch_datum_new: + * @value: A #GArrowRecordBatch. + * + * Returns: A newly created #GArrowRecordBatchDatum. + * + * Since: 1.0.0 + */ +GArrowRecordBatchDatum * +garrow_record_batch_datum_new(GArrowRecordBatch *value) +{ + auto arrow_value = garrow_record_batch_get_raw(value); + arrow::Datum arrow_datum(arrow_value); + return garrow_record_batch_datum_new_raw(&arrow_datum, value); +} + + +typedef struct GArrowTableDatumPrivate_ { + GArrowTable *value; +} GArrowTableDatumPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowTableDatum, + garrow_table_datum, + GARROW_TYPE_DATUM) + +#define GARROW_TABLE_DATUM_GET_PRIVATE(obj) \ + static_cast<GArrowTableDatumPrivate *>( \ + garrow_table_datum_get_instance_private( \ + GARROW_TABLE_DATUM(obj))) + +static void +garrow_table_datum_dispose(GObject *object) +{ + auto priv = GARROW_TABLE_DATUM_GET_PRIVATE(object); + + if (priv->value) { + g_object_unref(priv->value); + priv->value = NULL; + } + + G_OBJECT_CLASS(garrow_table_datum_parent_class)->dispose(object); +} + +static void +garrow_table_datum_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_TABLE_DATUM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_VALUE: + priv->value = GARROW_TABLE(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_table_datum_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_TABLE_DATUM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_VALUE: + g_value_set_object(value, priv->value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_table_datum_init(GArrowTableDatum *object) +{ +} + +static void +garrow_table_datum_class_init(GArrowTableDatumClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_table_datum_dispose; + gobject_class->set_property = garrow_table_datum_set_property; + gobject_class->get_property = garrow_table_datum_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("value", + "Value", + "The table held by this datum", + GARROW_TYPE_TABLE, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_VALUE, spec); +} + +/** + * garrow_table_datum_new: + * @value: A #GArrowTable. + * + * Returns: A newly created #GArrowTableDatum. + * + * Since: 1.0.0 + */ +GArrowTableDatum * +garrow_table_datum_new(GArrowTable *value) +{ + auto arrow_value = garrow_table_get_raw(value); + arrow::Datum arrow_datum(arrow_value); + return garrow_table_datum_new_raw(&arrow_datum, value); +} + + +G_END_DECLS + +arrow::Datum +garrow_datum_get_raw(GArrowDatum *datum) +{ + auto priv = GARROW_DATUM_GET_PRIVATE(datum); + return priv->datum; +} + +GArrowDatum * +garrow_datum_new_raw(arrow::Datum *arrow_datum) +{ + switch (arrow_datum->kind()) { + case arrow::Datum::SCALAR: + { + auto arrow_scalar = arrow_datum->scalar(); + auto scalar = garrow_scalar_new_raw(&arrow_scalar); + return GARROW_DATUM(garrow_scalar_datum_new_raw(arrow_datum, scalar)); + } + case arrow::Datum::ARRAY: + { + auto arrow_array = arrow_datum->make_array(); + auto array = garrow_array_new_raw(&arrow_array); + return GARROW_DATUM(garrow_array_datum_new_raw(arrow_datum, array)); + } + case arrow::Datum::CHUNKED_ARRAY: + { + auto arrow_chunked_array = arrow_datum->chunked_array(); + auto chunked_array = garrow_chunked_array_new_raw(&arrow_chunked_array); + auto chunked_array_datum = + garrow_chunked_array_datum_new_raw(arrow_datum, chunked_array); + return GARROW_DATUM(chunked_array_datum); + } + case arrow::Datum::RECORD_BATCH: + { + auto arrow_record_batch = arrow_datum->record_batch(); + auto record_batch = garrow_record_batch_new_raw(&arrow_record_batch); + auto record_batch_datum = + garrow_record_batch_datum_new_raw(arrow_datum, record_batch); + return GARROW_DATUM(record_batch_datum); + } + case arrow::Datum::TABLE: + { + auto arrow_table = arrow_datum->table(); + auto table = garrow_table_new_raw(&arrow_table); + return GARROW_DATUM(garrow_table_datum_new_raw(arrow_datum, table)); + } + default: + // TODO + return NULL; + } +} + +GArrowScalarDatum * +garrow_scalar_datum_new_raw(arrow::Datum *arrow_datum, + GArrowScalar *value) +{ + return GARROW_SCALAR_DATUM(g_object_new(GARROW_TYPE_SCALAR_DATUM, + "datum", arrow_datum, + "value", value, + NULL)); +} + +GArrowArrayDatum * +garrow_array_datum_new_raw(arrow::Datum *arrow_datum, + GArrowArray *value) +{ + return GARROW_ARRAY_DATUM(g_object_new(GARROW_TYPE_ARRAY_DATUM, + "datum", arrow_datum, + "value", value, + NULL)); +} + +GArrowChunkedArrayDatum * +garrow_chunked_array_datum_new_raw(arrow::Datum *arrow_datum, + GArrowChunkedArray *value) +{ + return GARROW_CHUNKED_ARRAY_DATUM(g_object_new(GARROW_TYPE_CHUNKED_ARRAY_DATUM, + "datum", arrow_datum, + "value", value, + NULL)); +} + +GArrowRecordBatchDatum * +garrow_record_batch_datum_new_raw(arrow::Datum *arrow_datum, + GArrowRecordBatch *value) +{ + return GARROW_RECORD_BATCH_DATUM(g_object_new(GARROW_TYPE_RECORD_BATCH_DATUM, + "datum", arrow_datum, + "value", value, + NULL)); +} + +GArrowTableDatum * +garrow_table_datum_new_raw(arrow::Datum *arrow_datum, + GArrowTable *value) +{ + return GARROW_TABLE_DATUM(g_object_new(GARROW_TYPE_TABLE_DATUM, + "datum", arrow_datum, + "value", value, + NULL)); +} diff --git a/src/arrow/c_glib/arrow-glib/datum.h b/src/arrow/c_glib/arrow-glib/datum.h new file mode 100644 index 000000000..bc7dda369 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/datum.h @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/array.h> +#include <arrow-glib/chunked-array.h> +#include <arrow-glib/record-batch.h> +#include <arrow-glib/scalar.h> +#include <arrow-glib/table.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_DATUM (garrow_datum_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDatum, + garrow_datum, + GARROW, + DATUM, + GObject) +struct _GArrowDatumClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_1_0 +gboolean garrow_datum_is_array(GArrowDatum *datum); +GARROW_AVAILABLE_IN_1_0 +gboolean garrow_datum_is_array_like(GArrowDatum *datum); +GARROW_AVAILABLE_IN_5_0 +gboolean garrow_datum_is_scalar(GArrowDatum *datum); +GARROW_AVAILABLE_IN_5_0 +gboolean garrow_datum_is_value(GArrowDatum *datum); +/* +GARROW_AVAILABLE_IN_5_0 +gboolean garrow_datum_is_collection(GArrowDatum *datum); +*/ +GARROW_AVAILABLE_IN_1_0 +gboolean garrow_datum_equal(GArrowDatum *datum, + GArrowDatum *other_datum); +GARROW_AVAILABLE_IN_1_0 +gchar *garrow_datum_to_string(GArrowDatum *datum); + +/* GARROW_TYPE_NONE_DATUM */ + +#define GARROW_TYPE_SCALAR_DATUM (garrow_scalar_datum_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowScalarDatum, + garrow_scalar_datum, + GARROW, + SCALAR_DATUM, + GArrowDatum) +struct _GArrowScalarDatumClass +{ + GArrowDatumClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowScalarDatum *garrow_scalar_datum_new(GArrowScalar *value); + +#define GARROW_TYPE_ARRAY_DATUM (garrow_array_datum_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowArrayDatum, + garrow_array_datum, + GARROW, + ARRAY_DATUM, + GArrowDatum) +struct _GArrowArrayDatumClass +{ + GArrowDatumClass parent_class; +}; + +GARROW_AVAILABLE_IN_1_0 +GArrowArrayDatum *garrow_array_datum_new(GArrowArray *value); + +#define GARROW_TYPE_CHUNKED_ARRAY_DATUM (garrow_chunked_array_datum_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowChunkedArrayDatum, + garrow_chunked_array_datum, + GARROW, + CHUNKED_ARRAY_DATUM, + GArrowDatum) +struct _GArrowChunkedArrayDatumClass +{ + GArrowDatumClass parent_class; +}; + +GARROW_AVAILABLE_IN_1_0 +GArrowChunkedArrayDatum * +garrow_chunked_array_datum_new(GArrowChunkedArray *value); + +#define GARROW_TYPE_RECORD_BATCH_DATUM (garrow_record_batch_datum_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchDatum, + garrow_record_batch_datum, + GARROW, + RECORD_BATCH_DATUM, + GArrowDatum) +struct _GArrowRecordBatchDatumClass +{ + GArrowDatumClass parent_class; +}; + +GARROW_AVAILABLE_IN_1_0 +GArrowRecordBatchDatum * +garrow_record_batch_datum_new(GArrowRecordBatch *value); + +#define GARROW_TYPE_TABLE_DATUM (garrow_table_datum_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowTableDatum, + garrow_table_datum, + GARROW, + TABLE_DATUM, + GArrowDatum) +struct _GArrowTableDatumClass +{ + GArrowDatumClass parent_class; +}; + +GARROW_AVAILABLE_IN_1_0 +GArrowTableDatum *garrow_table_datum_new(GArrowTable *value); + +/* GARROW_TYPE_COLLECTION_DATUM */ + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/datum.hpp b/src/arrow/c_glib/arrow-glib/datum.hpp new file mode 100644 index 000000000..d1acfc58c --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/datum.hpp @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/api.h> + +#include <arrow-glib/datum.h> + +arrow::Datum +garrow_datum_get_raw(GArrowDatum *datum); +GArrowDatum * +garrow_datum_new_raw(arrow::Datum *arrow_datum); + +GArrowScalarDatum * +garrow_scalar_datum_new_raw(arrow::Datum *arrow_datum, + GArrowScalar *value); +GArrowArrayDatum * +garrow_array_datum_new_raw(arrow::Datum *arrow_datum, + GArrowArray *value); +GArrowChunkedArrayDatum * +garrow_chunked_array_datum_new_raw(arrow::Datum *arrow_datum, + GArrowChunkedArray *value); +GArrowRecordBatchDatum * +garrow_record_batch_datum_new_raw(arrow::Datum *arrow_datum, + GArrowRecordBatch *value); +GArrowTableDatum * +garrow_table_datum_new_raw(arrow::Datum *arrow_datum, + GArrowTable *value); diff --git a/src/arrow/c_glib/arrow-glib/decimal.cpp b/src/arrow/c_glib/arrow-glib/decimal.cpp new file mode 100644 index 000000000..497d76fcf --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/decimal.cpp @@ -0,0 +1,1115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/decimal.hpp> +#include <arrow-glib/error.hpp> + +template <typename Decimal> +struct DecimalConverter { +}; + +template <> +struct DecimalConverter<arrow::Decimal128> { + using ArrowType = arrow::Decimal128; + using GArrowType = GArrowDecimal128; + + GArrowType * + new_raw(std::shared_ptr<ArrowType> *arrow_decimal128) + { + return garrow_decimal128_new_raw(arrow_decimal128); + } + + std::shared_ptr<ArrowType> + get_raw(GArrowType *decimal128) + { + return garrow_decimal128_get_raw(decimal128); + } +}; + +template <> +struct DecimalConverter<arrow::Decimal256> { + using ArrowType = arrow::Decimal256; + using GArrowType = GArrowDecimal256; + + GArrowType * + new_raw(std::shared_ptr<ArrowType> *arrow_decimal256) { + return garrow_decimal256_new_raw(arrow_decimal256); + } + + std::shared_ptr<ArrowType> + get_raw(GArrowType *decimal256) { + return garrow_decimal256_get_raw(decimal256); + } +}; + +template <typename Decimal> +typename DecimalConverter<Decimal>::GArrowType * +garrow_decimal_new_string(const gchar *data) +{ + auto arrow_decimal = std::make_shared<Decimal>(data); + DecimalConverter<Decimal> converter; + return converter.new_raw(&arrow_decimal); +} + +template <typename Decimal> +typename DecimalConverter<Decimal>::GArrowType * +garrow_decimal_new_integer(const gint64 data) +{ + auto arrow_decimal = std::make_shared<Decimal>(data); + DecimalConverter<Decimal> converter; + return converter.new_raw(&arrow_decimal); +} + +template <typename Decimal> +typename DecimalConverter<Decimal>::GArrowType * +garrow_decimal_copy(typename DecimalConverter<Decimal>::GArrowType *decimal) +{ + DecimalConverter<Decimal> converter; + const auto arrow_decimal = converter.get_raw(decimal); + auto arrow_copied_decimal = std::make_shared<Decimal>(*arrow_decimal); + return converter.new_raw(&arrow_copied_decimal); +} + +template <typename Decimal> +gboolean +garrow_decimal_equal(typename DecimalConverter<Decimal>::GArrowType *decimal, + typename DecimalConverter<Decimal>::GArrowType *other_decimal) +{ + DecimalConverter<Decimal> converter; + const auto arrow_decimal = converter.get_raw(decimal); + const auto arrow_other_decimal = converter.get_raw(other_decimal); + return *arrow_decimal == *arrow_other_decimal; +} + +template <typename Decimal> +gboolean +garrow_decimal_not_equal(typename DecimalConverter<Decimal>::GArrowType *decimal, + typename DecimalConverter<Decimal>::GArrowType *other_decimal) +{ + DecimalConverter<Decimal> converter; + const auto arrow_decimal = converter.get_raw(decimal); + const auto arrow_other_decimal = converter.get_raw(other_decimal); + return *arrow_decimal != *arrow_other_decimal; +} + +template <typename Decimal> +gboolean +garrow_decimal_less_than(typename DecimalConverter<Decimal>::GArrowType *decimal, + typename DecimalConverter<Decimal>::GArrowType *other_decimal) +{ + DecimalConverter<Decimal> converter; + const auto arrow_decimal = converter.get_raw(decimal); + const auto arrow_other_decimal = converter.get_raw(other_decimal); + return *arrow_decimal < *arrow_other_decimal; +} + +template <typename Decimal> +gboolean +garrow_decimal_less_than_or_equal(typename DecimalConverter<Decimal>::GArrowType *decimal, + typename DecimalConverter<Decimal>::GArrowType *other_decimal) +{ + DecimalConverter<Decimal> converter; + const auto arrow_decimal = converter.get_raw(decimal); + const auto arrow_other_decimal = converter.get_raw(other_decimal); + return *arrow_decimal <= *arrow_other_decimal; +} + +template <typename Decimal> +gboolean +garrow_decimal_greater_than(typename DecimalConverter<Decimal>::GArrowType *decimal, + typename DecimalConverter<Decimal>::GArrowType *other_decimal) +{ + DecimalConverter<Decimal> converter; + const auto arrow_decimal = converter.get_raw(decimal); + const auto arrow_other_decimal = converter.get_raw(other_decimal); + return *arrow_decimal > *arrow_other_decimal; +} + +template <typename Decimal> +gboolean +garrow_decimal_greater_than_or_equal(typename DecimalConverter<Decimal>::GArrowType *decimal, + typename DecimalConverter<Decimal>::GArrowType *other_decimal) +{ + DecimalConverter<Decimal> converter; + const auto arrow_decimal = converter.get_raw(decimal); + const auto arrow_other_decimal = converter.get_raw(other_decimal); + return *arrow_decimal >= *arrow_other_decimal; +} + +template <typename Decimal> +gchar * +garrow_decimal_to_string_scale(typename DecimalConverter<Decimal>::GArrowType *decimal, + gint32 scale) +{ + DecimalConverter<Decimal> converter; + const auto arrow_decimal = converter.get_raw(decimal); + return g_strdup(arrow_decimal->ToString(scale).c_str()); +} + +template <typename Decimal> +gchar * +garrow_decimal_to_string(typename DecimalConverter<Decimal>::GArrowType *decimal) +{ + DecimalConverter<Decimal> converter; + const auto arrow_decimal = converter.get_raw(decimal); + return g_strdup(arrow_decimal->ToIntegerString().c_str()); +} + +template <typename Decimal> +GBytes * +garrow_decimal_to_bytes(typename DecimalConverter<Decimal>::GArrowType *decimal) +{ + DecimalConverter<Decimal> converter; + const auto arrow_decimal = converter.get_raw(decimal); + uint8_t data[DecimalConverter<Decimal>::ArrowType::kBitWidth / 8]; + arrow_decimal->ToBytes(data); + return g_bytes_new(data, sizeof(data)); +} + +template <typename Decimal> +void +garrow_decimal_abs(typename DecimalConverter<Decimal>::GArrowType *decimal) +{ + DecimalConverter<Decimal> converter; + auto arrow_decimal = converter.get_raw(decimal); + arrow_decimal->Abs(); +} + +template <typename Decimal> +void +garrow_decimal_negate(typename DecimalConverter<Decimal>::GArrowType *decimal) +{ + DecimalConverter<Decimal> converter; + auto arrow_decimal = converter.get_raw(decimal); + arrow_decimal->Negate(); +} + +template <typename Decimal> +typename DecimalConverter<Decimal>::GArrowType * +garrow_decimal_plus(typename DecimalConverter<Decimal>::GArrowType *left, + typename DecimalConverter<Decimal>::GArrowType *right) +{ + DecimalConverter<Decimal> converter; + auto arrow_left = converter.get_raw(left); + auto arrow_right = converter.get_raw(right); + auto arrow_decimal = std::make_shared<Decimal>(*arrow_left + *arrow_right); + return converter.new_raw(&arrow_decimal); +} + +template <typename Decimal> +typename DecimalConverter<Decimal>::GArrowType * +garrow_decimal_minus(typename DecimalConverter<Decimal>::GArrowType *left, + typename DecimalConverter<Decimal>::GArrowType *right) +{ + DecimalConverter<Decimal> converter; + auto arrow_left = converter.get_raw(left); + auto arrow_right = converter.get_raw(right); + auto arrow_decimal = std::make_shared<Decimal>(*arrow_left - *arrow_right); + return converter.new_raw(&arrow_decimal); +} + +template <typename Decimal> +typename DecimalConverter<Decimal>::GArrowType * +garrow_decimal_multiply(typename DecimalConverter<Decimal>::GArrowType *left, + typename DecimalConverter<Decimal>::GArrowType *right) +{ + DecimalConverter<Decimal> converter; + auto arrow_left = converter.get_raw(left); + auto arrow_right = converter.get_raw(right); + auto arrow_decimal = std::make_shared<Decimal>(*arrow_left * *arrow_right); + return converter.new_raw(&arrow_decimal); +} + +template <typename Decimal> +typename DecimalConverter<Decimal>::GArrowType * +garrow_decimal_divide(typename DecimalConverter<Decimal>::GArrowType *left, + typename DecimalConverter<Decimal>::GArrowType *right, + typename DecimalConverter<Decimal>::GArrowType **remainder, + GError **error, + const gchar *tag) +{ + DecimalConverter<Decimal> converter; + auto arrow_left = converter.get_raw(left); + auto arrow_right = converter.get_raw(right); + auto arrow_result = arrow_left->Divide(*arrow_right); + if (garrow::check(error, arrow_result, tag)) { + Decimal arrow_quotient_raw; + Decimal arrow_remainder_raw; + std::tie(arrow_quotient_raw, arrow_remainder_raw) = *arrow_result; + if (remainder) { + auto arrow_remainder = std::make_shared<Decimal>(arrow_remainder_raw); + *remainder = converter.new_raw(&arrow_remainder); + } + auto arrow_quotient = std::make_shared<Decimal>(arrow_quotient_raw); + return converter.new_raw(&arrow_quotient); + } else { + if (remainder) { + *remainder = NULL; + } + return NULL; + } +} + +template <typename Decimal> +typename DecimalConverter<Decimal>::GArrowType * +garrow_decimal_rescale(typename DecimalConverter<Decimal>::GArrowType *decimal, + gint32 original_scale, + gint32 new_scale, + GError **error, + const gchar *tag) +{ + DecimalConverter<Decimal> converter; + auto arrow_decimal = converter.get_raw(decimal); + auto arrow_result = arrow_decimal->Rescale(original_scale, new_scale); + if (garrow::check(error, arrow_result, tag)) { + auto arrow_rescaled_decimal = std::make_shared<Decimal>(*arrow_result); + return converter.new_raw(&arrow_rescaled_decimal); + } else { + return NULL; + } +} + + +G_BEGIN_DECLS + +/** + * SECTION: decimal + * @section_id: decimal + * @title: 128-bit and 256-bit decimal classes + * @include: arrow-glib/arrow-glib.h + * + * #GArrowDecimal128 is a 128-bit decimal class. + * + * #GArrowDecimal256 is a 256-bit decimal class. + * + * Since: 0.10.0 + */ + +typedef struct GArrowDecimal128Private_ { + std::shared_ptr<arrow::Decimal128> decimal128; +} GArrowDecimal128Private; + +enum { + PROP_DECIMAL128 = 1 +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowDecimal128, + garrow_decimal128, + G_TYPE_OBJECT) + +#define GARROW_DECIMAL128_GET_PRIVATE(obj) \ + static_cast<GArrowDecimal128Private *>( \ + garrow_decimal128_get_instance_private( \ + GARROW_DECIMAL128(obj))) + +static void +garrow_decimal128_finalize(GObject *object) +{ + auto priv = GARROW_DECIMAL128_GET_PRIVATE(object); + + priv->decimal128.~shared_ptr(); + + G_OBJECT_CLASS(garrow_decimal128_parent_class)->finalize(object); +} + +static void +garrow_decimal128_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_DECIMAL128_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_DECIMAL128: + priv->decimal128 = + *static_cast<std::shared_ptr<arrow::Decimal128> *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_decimal128_init(GArrowDecimal128 *object) +{ + auto priv = GARROW_DECIMAL128_GET_PRIVATE(object); + new(&priv->decimal128) std::shared_ptr<arrow::Decimal128>; +} + +static void +garrow_decimal128_class_init(GArrowDecimal128Class *klass) +{ + GParamSpec *spec; + + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_decimal128_finalize; + gobject_class->set_property = garrow_decimal128_set_property; + + spec = g_param_spec_pointer("decimal128", + "Decimal128", + "The raw std::shared<arrow::Decimal128> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_DECIMAL128, spec); +} + +/** + * garrow_decimal128_new_string: + * @data: The data of the decimal. + * + * Returns: A newly created #GArrowDecimal128. + * + * Since: 0.10.0 + */ +GArrowDecimal128 * +garrow_decimal128_new_string(const gchar *data) +{ + return garrow_decimal_new_string<arrow::Decimal128>(data); +} + +/** + * garrow_decimal128_new_integer: + * @data: The data of the decimal. + * + * Returns: A newly created #GArrowDecimal128. + * + * Since: 0.10.0 + */ +GArrowDecimal128 * +garrow_decimal128_new_integer(const gint64 data) +{ + return garrow_decimal_new_integer<arrow::Decimal128>(data); +} + +/** + * garrow_decimal128_copy: + * @decimal: The decimal to be copied. + * + * Returns: (transfer full): A copied #GArrowDecimal128. + * + * Since: 3.0.0 + */ +GArrowDecimal128 * +garrow_decimal128_copy(GArrowDecimal128 *decimal) +{ + return garrow_decimal_copy<arrow::Decimal128>(decimal); +} + +/** + * garrow_decimal128_equal: + * @decimal: A #GArrowDecimal128. + * @other_decimal: A #GArrowDecimal128 to be compared. + * + * Returns: %TRUE if the decimal is equal to the other decimal, %FALSE + * otherwise. + * + * Since: 0.12.0 + */ +gboolean +garrow_decimal128_equal(GArrowDecimal128 *decimal, + GArrowDecimal128 *other_decimal) +{ + return garrow_decimal_equal<arrow::Decimal128>(decimal, other_decimal); +} + +/** + * garrow_decimal128_not_equal: + * @decimal: A #GArrowDecimal128. + * @other_decimal: A #GArrowDecimal128 to be compared. + * + * Returns: %TRUE if the decimal isn't equal to the other decimal, + * %FALSE otherwise. + * + * Since: 0.12.0 + */ +gboolean +garrow_decimal128_not_equal(GArrowDecimal128 *decimal, + GArrowDecimal128 *other_decimal) +{ + return garrow_decimal_not_equal<arrow::Decimal128>(decimal, other_decimal); +} + +/** + * garrow_decimal128_less_than: + * @decimal: A #GArrowDecimal128. + * @other_decimal: A #GArrowDecimal128 to be compared. + * + * Returns: %TRUE if the decimal is less than the other decimal, + * %FALSE otherwise. + * + * Since: 0.12.0 + */ +gboolean +garrow_decimal128_less_than(GArrowDecimal128 *decimal, + GArrowDecimal128 *other_decimal) +{ + return garrow_decimal_less_than<arrow::Decimal128>(decimal, other_decimal); +} + +/** + * garrow_decimal128_less_than_or_equal: + * @decimal: A #GArrowDecimal128. + * @other_decimal: A #GArrowDecimal128 to be compared. + * + * Returns: %TRUE if the decimal is less than the other decimal + * or equal to the other decimal, %FALSE otherwise. + * + * Since: 0.12.0 + */ +gboolean +garrow_decimal128_less_than_or_equal(GArrowDecimal128 *decimal, + GArrowDecimal128 *other_decimal) +{ + return garrow_decimal_less_than_or_equal<arrow::Decimal128>(decimal, other_decimal); +} + +/** + * garrow_decimal128_greater_than: + * @decimal: A #GArrowDecimal128. + * @other_decimal: A #GArrowDecimal128 to be compared. + * + * Returns: %TRUE if the decimal is greater than the other decimal, + * %FALSE otherwise. + * + * Since: 0.12.0 + */ +gboolean +garrow_decimal128_greater_than(GArrowDecimal128 *decimal, + GArrowDecimal128 *other_decimal) +{ + return garrow_decimal_greater_than<arrow::Decimal128>(decimal, other_decimal); +} + +/** + * garrow_decimal128_greater_than_or_equal: + * @decimal: A #GArrowDecimal128. + * @other_decimal: A #GArrowDecimal128 to be compared. + * + * Returns: %TRUE if the decimal is greater than the other decimal + * or equal to the other decimal, %FALSE otherwise. + * + * Since: 0.12.0 + */ +gboolean +garrow_decimal128_greater_than_or_equal(GArrowDecimal128 *decimal, + GArrowDecimal128 *other_decimal) +{ + return garrow_decimal_greater_than_or_equal<arrow::Decimal128>(decimal, other_decimal); +} + +/** + * garrow_decimal128_to_string_scale: + * @decimal: A #GArrowDecimal128. + * @scale: The scale of the decimal. + * + * Returns: The string representation of the decimal. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 0.10.0 + */ +gchar * +garrow_decimal128_to_string_scale(GArrowDecimal128 *decimal, gint32 scale) +{ + return garrow_decimal_to_string_scale<arrow::Decimal128>(decimal, scale); +} + +/** + * garrow_decimal128_to_string: + * @decimal: A #GArrowDecimal128. + * + * Returns: The string representation of the decimal. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 0.10.0 + */ +gchar * +garrow_decimal128_to_string(GArrowDecimal128 *decimal) +{ + return garrow_decimal_to_string<arrow::Decimal128>(decimal); +} + +/** + * garrow_decimal128_to_bytes: + * @decimal: A #GArrowDecimal128. + * + * Returns: (transfer full): The binary representation of the decimal. + * + * Since: 3.0.0 + */ +GBytes * +garrow_decimal128_to_bytes(GArrowDecimal128 *decimal) +{ + return garrow_decimal_to_bytes<arrow::Decimal128>(decimal); +} + +/** + * garrow_decimal128_abs: + * @decimal: A #GArrowDecimal128. + * + * Computes the absolute value of the @decimal destructively. + * + * Since: 0.10.0 + */ +void +garrow_decimal128_abs(GArrowDecimal128 *decimal) +{ + garrow_decimal_abs<arrow::Decimal128>(decimal); +} + +/** + * garrow_decimal128_negate: + * @decimal: A #GArrowDecimal128. + * + * Negate the current value of the @decimal destructively. + * + * Since: 0.10.0 + */ +void +garrow_decimal128_negate(GArrowDecimal128 *decimal) +{ + garrow_decimal_negate<arrow::Decimal128>(decimal); +} + +/** + * garrow_decimal128_to_integer: + * @decimal: A #GArrowDecimal128. + * + * Returns: The 64-bit integer representation of the decimal. + * + * Since: 0.10.0 + */ +gint64 +garrow_decimal128_to_integer(GArrowDecimal128 *decimal) +{ + auto arrow_decimal = garrow_decimal128_get_raw(decimal); + return static_cast<int64_t>(*arrow_decimal); +} + +/** + * garrow_decimal128_plus: + * @left: A #GArrowDecimal128. + * @right: A #GArrowDecimal128. + * + * Returns: (transfer full): The added value of these decimals. + * + * Since: 0.11.0 + */ +GArrowDecimal128 * +garrow_decimal128_plus(GArrowDecimal128 *left, + GArrowDecimal128 *right) +{ + return garrow_decimal_plus<arrow::Decimal128>(left, right); +} + +/** + * garrow_decimal128_minus: + * @left: A #GArrowDecimal128. + * @right: A #GArrowDecimal128. + * + * Returns: (transfer full): The subtracted value of these decimals. + * + * Since: 0.11.0 + */ +GArrowDecimal128 * +garrow_decimal128_minus(GArrowDecimal128 *left, + GArrowDecimal128 *right) +{ + return garrow_decimal_minus<arrow::Decimal128>(left, right); +} + +/** + * garrow_decimal128_multiply: + * @left: A #GArrowDecimal128. + * @right: A #GArrowDecimal128. + * + * Returns: (transfer full): The multiplied value of these decimals. + * + * Since: 0.11.0 + */ +GArrowDecimal128 * +garrow_decimal128_multiply(GArrowDecimal128 *left, + GArrowDecimal128 *right) +{ + return garrow_decimal_multiply<arrow::Decimal128>(left, right); +} + +/** + * garrow_decimal128_divide: + * @left: A #GArrowDecimal128. + * @right: A #GArrowDecimal128. + * @remainder: (out) (nullable): A return location for the remainder + * value of these decimals. The returned #GArrowDecimal128 be + * unreferred with g_object_unref() when no longer needed. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The divided value of + * these decimals or %NULL on error. + * + * Since: 0.11.0 + */ +GArrowDecimal128 * +garrow_decimal128_divide(GArrowDecimal128 *left, + GArrowDecimal128 *right, + GArrowDecimal128 **remainder, + GError **error) +{ + return garrow_decimal_divide<arrow::Decimal128>(left, + right, + remainder, + error, + "[decimal128][divide]"); +} + +/** + * garrow_decimal128_rescale: + * @decimal: A #GArrowDecimal128. + * @original_scale: A scale to be converted from. + * @new_scale: A scale to be converted to. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The rescaled decimal or %NULL on error. + * + * Since: 0.15.0 + */ +GArrowDecimal128 * +garrow_decimal128_rescale(GArrowDecimal128 *decimal, + gint32 original_scale, + gint32 new_scale, + GError **error) +{ + return garrow_decimal_rescale<arrow::Decimal128>(decimal, + original_scale, + new_scale, + error, + "[decimal128][rescale]"); +} + + +typedef struct GArrowDecimal256Private_ { + std::shared_ptr<arrow::Decimal256> decimal256; +} GArrowDecimal256Private; + +enum { + PROP_DECIMAL256 = 1 +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowDecimal256, + garrow_decimal256, + G_TYPE_OBJECT) + +#define GARROW_DECIMAL256_GET_PRIVATE(obj) \ + static_cast<GArrowDecimal256Private *>( \ + garrow_decimal256_get_instance_private( \ + GARROW_DECIMAL256(obj))) + +static void +garrow_decimal256_finalize(GObject *object) +{ + auto priv = GARROW_DECIMAL256_GET_PRIVATE(object); + + priv->decimal256.~shared_ptr(); + + G_OBJECT_CLASS(garrow_decimal256_parent_class)->finalize(object); +} + +static void +garrow_decimal256_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_DECIMAL256_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_DECIMAL256: + priv->decimal256 = + *static_cast<std::shared_ptr<arrow::Decimal256> *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_decimal256_init(GArrowDecimal256 *object) +{ + auto priv = GARROW_DECIMAL256_GET_PRIVATE(object); + new(&priv->decimal256) std::shared_ptr<arrow::Decimal256>; +} + +static void +garrow_decimal256_class_init(GArrowDecimal256Class *klass) +{ + GParamSpec *spec; + + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_decimal256_finalize; + gobject_class->set_property = garrow_decimal256_set_property; + + spec = g_param_spec_pointer("decimal256", + "Decimal256", + "The raw std::shared<arrow::Decimal256> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_DECIMAL256, spec); +} + +/** + * garrow_decimal256_new_string: + * @data: The data of the decimal. + * + * Returns: A newly created #GArrowDecimal256. + * + * Since: 3.0.0 + */ +GArrowDecimal256 * +garrow_decimal256_new_string(const gchar *data) +{ + return garrow_decimal_new_string<arrow::Decimal256>(data); +} + +/** + * garrow_decimal256_new_integer: + * @data: The data of the decimal. + * + * Returns: A newly created #GArrowDecimal256. + * + * Since: 3.0.0 + */ +GArrowDecimal256 * +garrow_decimal256_new_integer(const gint64 data) +{ + return garrow_decimal_new_integer<arrow::Decimal256>(data); +} + +/** + * garrow_decimal256_copy: + * @decimal: The decimal to be copied. + * + * Returns: (transfer full): A copied #GArrowDecimal256. + * + * Since: 3.0.0 + */ +GArrowDecimal256 * +garrow_decimal256_copy(GArrowDecimal256 *decimal) +{ + return garrow_decimal_copy<arrow::Decimal256>(decimal); +} + +/** + * garrow_decimal256_equal: + * @decimal: A #GArrowDecimal256. + * @other_decimal: A #GArrowDecimal256 to be compared. + * + * Returns: %TRUE if the decimal is equal to the other decimal, %FALSE + * otherwise. + * + * Since: 3.0.0 + */ +gboolean +garrow_decimal256_equal(GArrowDecimal256 *decimal, + GArrowDecimal256 *other_decimal) +{ + return garrow_decimal_equal<arrow::Decimal256>(decimal, other_decimal); +} + +/** + * garrow_decimal256_not_equal: + * @decimal: A #GArrowDecimal256. + * @other_decimal: A #GArrowDecimal256 to be compared. + * + * Returns: %TRUE if the decimal isn't equal to the other decimal, + * %FALSE otherwise. + * + * Since: 3.0.0 + */ +gboolean +garrow_decimal256_not_equal(GArrowDecimal256 *decimal, + GArrowDecimal256 *other_decimal) +{ + return garrow_decimal_not_equal<arrow::Decimal256>(decimal, other_decimal); +} + +/** + * garrow_decimal256_less_than: + * @decimal: A #GArrowDecimal256. + * @other_decimal: A #GArrowDecimal256 to be compared. + * + * Returns: %TRUE if the decimal is less than the other decimal, + * %FALSE otherwise. + * + * Since: 3.0.0 + */ +gboolean +garrow_decimal256_less_than(GArrowDecimal256 *decimal, + GArrowDecimal256 *other_decimal) +{ + return garrow_decimal_less_than<arrow::Decimal256>(decimal, other_decimal); +} + +/** + * garrow_decimal256_less_than_or_equal: + * @decimal: A #GArrowDecimal256. + * @other_decimal: A #GArrowDecimal256 to be compared. + * + * Returns: %TRUE if the decimal is less than the other decimal + * or equal to the other decimal, %FALSE otherwise. + * + * Since: 3.0.0 + */ +gboolean +garrow_decimal256_less_than_or_equal(GArrowDecimal256 *decimal, + GArrowDecimal256 *other_decimal) +{ + return garrow_decimal_less_than_or_equal<arrow::Decimal256>(decimal, other_decimal); +} + +/** + * garrow_decimal256_greater_than: + * @decimal: A #GArrowDecimal256. + * @other_decimal: A #GArrowDecimal256 to be compared. + * + * Returns: %TRUE if the decimal is greater than the other decimal, + * %FALSE otherwise. + * + * Since: 3.0.0 + */ +gboolean +garrow_decimal256_greater_than(GArrowDecimal256 *decimal, + GArrowDecimal256 *other_decimal) +{ + return garrow_decimal_greater_than<arrow::Decimal256>(decimal, other_decimal); +} + +/** + * garrow_decimal256_greater_than_or_equal: + * @decimal: A #GArrowDecimal256. + * @other_decimal: A #GArrowDecimal256 to be compared. + * + * Returns: %TRUE if the decimal is greater than the other decimal + * or equal to the other decimal, %FALSE otherwise. + * + * Since: 3.0.0 + */ +gboolean +garrow_decimal256_greater_than_or_equal(GArrowDecimal256 *decimal, + GArrowDecimal256 *other_decimal) +{ + return garrow_decimal_greater_than_or_equal<arrow::Decimal256>(decimal, other_decimal); +} + +/** + * garrow_decimal256_to_string_scale: + * @decimal: A #GArrowDecimal256. + * @scale: The scale of the decimal. + * + * Returns: The string representation of the decimal. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 3.0.0 + */ +gchar * +garrow_decimal256_to_string_scale(GArrowDecimal256 *decimal, gint32 scale) +{ + return garrow_decimal_to_string_scale<arrow::Decimal256>(decimal, scale); +} + +/** + * garrow_decimal256_to_string: + * @decimal: A #GArrowDecimal256. + * + * Returns: The string representation of the decimal. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 3.0.0 + */ +gchar * +garrow_decimal256_to_string(GArrowDecimal256 *decimal) +{ + return garrow_decimal_to_string<arrow::Decimal256>(decimal); +} + +/** + * garrow_decimal256_to_bytes: + * @decimal: A #GArrowDecimal256. + * + * Returns: (transfer full): The binary representation of the decimal. + * + * Since: 3.0.0 + */ +GBytes * +garrow_decimal256_to_bytes(GArrowDecimal256 *decimal) +{ + return garrow_decimal_to_bytes<arrow::Decimal256>(decimal); +} + +/** + * garrow_decimal256_abs: + * @decimal: A #GArrowDecimal256. + * + * Computes the absolute value of the @decimal destructively. + * + * Since: 3.0.0 + */ +void +garrow_decimal256_abs(GArrowDecimal256 *decimal) +{ + garrow_decimal_abs<arrow::Decimal256>(decimal); +} + +/** + * garrow_decimal256_negate: + * @decimal: A #GArrowDecimal256. + * + * Negate the current value of the @decimal destructively. + * + * Since: 3.0.0 + */ +void +garrow_decimal256_negate(GArrowDecimal256 *decimal) +{ + garrow_decimal_negate<arrow::Decimal256>(decimal); +} + +/** + * garrow_decimal256_plus: + * @left: A #GArrowDecimal256. + * @right: A #GArrowDecimal256. + * + * Returns: (transfer full): The added value of these decimals. + * + * Since: 3.0.0 + */ +GArrowDecimal256 * +garrow_decimal256_plus(GArrowDecimal256 *left, + GArrowDecimal256 *right) +{ + return garrow_decimal_plus<arrow::Decimal256>(left, right); +} + +/** + * garrow_decimal256_multiply: + * @left: A #GArrowDecimal256. + * @right: A #GArrowDecimal256. + * + * Returns: (transfer full): The multiplied value of these decimals. + * + * Since: 3.0.0 + */ +GArrowDecimal256 * +garrow_decimal256_multiply(GArrowDecimal256 *left, + GArrowDecimal256 *right) +{ + return garrow_decimal_multiply<arrow::Decimal256>(left, right); +} + +/** + * garrow_decimal256_divide: + * @left: A #GArrowDecimal256. + * @right: A #GArrowDecimal256. + * @remainder: (out) (nullable): A return location for the remainder + * value of these decimals. The returned #GArrowDecimal256 be + * unreferred with g_object_unref() when no longer needed. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The divided value of + * these decimals or %NULL on error. + * + * Since: 3.0.0 + */ +GArrowDecimal256 * +garrow_decimal256_divide(GArrowDecimal256 *left, + GArrowDecimal256 *right, + GArrowDecimal256 **remainder, + GError **error) +{ + return garrow_decimal_divide<arrow::Decimal256>(left, + right, + remainder, + error, + "[decimal256][divide]"); +} + +/** + * garrow_decimal256_rescale: + * @decimal: A #GArrowDecimal256. + * @original_scale: A scale to be converted from. + * @new_scale: A scale to be converted to. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The rescaled decimal or %NULL on error. + * + * Since: 3.0.0 + */ +GArrowDecimal256 * +garrow_decimal256_rescale(GArrowDecimal256 *decimal, + gint32 original_scale, + gint32 new_scale, + GError **error) +{ + return garrow_decimal_rescale<arrow::Decimal256>(decimal, + original_scale, + new_scale, + error, + "[decimal256][rescale]"); +} + + +G_END_DECLS + +GArrowDecimal128 * +garrow_decimal128_new_raw(std::shared_ptr<arrow::Decimal128> *arrow_decimal128) +{ + auto decimal128 = g_object_new(garrow_decimal128_get_type(), + "decimal128", arrow_decimal128, + NULL); + return GARROW_DECIMAL128(decimal128); +} + +std::shared_ptr<arrow::Decimal128> +garrow_decimal128_get_raw(GArrowDecimal128 *decimal128) +{ + auto priv = GARROW_DECIMAL128_GET_PRIVATE(decimal128); + return priv->decimal128; +} + +GArrowDecimal256 * +garrow_decimal256_new_raw(std::shared_ptr<arrow::Decimal256> *arrow_decimal256) +{ + auto decimal256 = g_object_new(garrow_decimal256_get_type(), + "decimal256", arrow_decimal256, + NULL); + return GARROW_DECIMAL256(decimal256); +} + +std::shared_ptr<arrow::Decimal256> +garrow_decimal256_get_raw(GArrowDecimal256 *decimal256) +{ + auto priv = GARROW_DECIMAL256_GET_PRIVATE(decimal256); + return priv->decimal256; +} diff --git a/src/arrow/c_glib/arrow-glib/decimal.h b/src/arrow/c_glib/arrow-glib/decimal.h new file mode 100644 index 000000000..61f849234 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/decimal.h @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/gobject-type.h> +#include <arrow-glib/version.h> + +G_BEGIN_DECLS + +/* Disabled because it conflicts with GARROW_TYPE_DECIMAL128 in GArrowType. */ +/* #define GARROW_TYPE_DECIMAL128 (garrow_decimal128_get_type()) */ +G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128, + garrow_decimal128, + GARROW, + DECIMAL128, + GObject) + +struct _GArrowDecimal128Class +{ + GObjectClass parent_class; +}; + +GArrowDecimal128 *garrow_decimal128_new_string(const gchar *data); +GArrowDecimal128 *garrow_decimal128_new_integer(const gint64 data); +GARROW_AVAILABLE_IN_3_0 +GArrowDecimal128 *garrow_decimal128_copy(GArrowDecimal128 *decimal); +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_decimal128_equal(GArrowDecimal128 *decimal, + GArrowDecimal128 *other_decimal); +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_decimal128_not_equal(GArrowDecimal128 *decimal, + GArrowDecimal128 *other_decimal); +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_decimal128_less_than(GArrowDecimal128 *decimal, + GArrowDecimal128 *other_decimal); +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_decimal128_less_than_or_equal(GArrowDecimal128 *decimal, + GArrowDecimal128 *other_decimal); +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_decimal128_greater_than(GArrowDecimal128 *decimal, + GArrowDecimal128 *other_decimal); +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_decimal128_greater_than_or_equal(GArrowDecimal128 *decimal, + GArrowDecimal128 *other_decimal); +gchar *garrow_decimal128_to_string_scale(GArrowDecimal128 *decimal, + gint32 scale); +gchar *garrow_decimal128_to_string(GArrowDecimal128 *decimal); +GARROW_AVAILABLE_IN_3_0 +GBytes *garrow_decimal128_to_bytes(GArrowDecimal128 *decimal); +void garrow_decimal128_abs(GArrowDecimal128 *decimal); +void garrow_decimal128_negate(GArrowDecimal128 *decimal); +gint64 garrow_decimal128_to_integer(GArrowDecimal128 *decimal); +GArrowDecimal128 *garrow_decimal128_plus(GArrowDecimal128 *left, + GArrowDecimal128 *right); +GArrowDecimal128 *garrow_decimal128_minus(GArrowDecimal128 *left, + GArrowDecimal128 *right); +GArrowDecimal128 *garrow_decimal128_multiply(GArrowDecimal128 *left, + GArrowDecimal128 *right); +GArrowDecimal128 *garrow_decimal128_divide(GArrowDecimal128 *left, + GArrowDecimal128 *right, + GArrowDecimal128 **remainder, + GError **error); +GARROW_AVAILABLE_IN_0_15 +GArrowDecimal128 * +garrow_decimal128_rescale(GArrowDecimal128 *decimal, + gint32 original_scale, + gint32 new_scale, + GError **error); + + +/* Disabled because it conflicts with GARROW_TYPE_DECIMAL256 in GArrowType. */ +/* #define GARROW_TYPE_DECIMAL256 (garrow_decimal256_get_type()) */ +G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256, + garrow_decimal256, + GARROW, + DECIMAL256, + GObject) + +struct _GArrowDecimal256Class +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_3_0 +GArrowDecimal256 *garrow_decimal256_new_string(const gchar *data); +GARROW_AVAILABLE_IN_3_0 +GArrowDecimal256 *garrow_decimal256_new_integer(const gint64 data); +GARROW_AVAILABLE_IN_3_0 +GArrowDecimal256 *garrow_decimal256_copy(GArrowDecimal256 *decimal); +GARROW_AVAILABLE_IN_3_0 +gboolean garrow_decimal256_equal(GArrowDecimal256 *decimal, + GArrowDecimal256 *other_decimal); +GARROW_AVAILABLE_IN_3_0 +gboolean garrow_decimal256_not_equal(GArrowDecimal256 *decimal, + GArrowDecimal256 *other_decimal); +GARROW_AVAILABLE_IN_3_0 +gboolean garrow_decimal256_less_than(GArrowDecimal256 *decimal, + GArrowDecimal256 *other_decimal); +GARROW_AVAILABLE_IN_3_0 +gboolean garrow_decimal256_less_than_or_equal(GArrowDecimal256 *decimal, + GArrowDecimal256 *other_decimal); +GARROW_AVAILABLE_IN_3_0 +gboolean garrow_decimal256_greater_than(GArrowDecimal256 *decimal, + GArrowDecimal256 *other_decimal); +GARROW_AVAILABLE_IN_3_0 +gboolean garrow_decimal256_greater_than_or_equal(GArrowDecimal256 *decimal, + GArrowDecimal256 *other_decimal); +GARROW_AVAILABLE_IN_3_0 +gchar *garrow_decimal256_to_string_scale(GArrowDecimal256 *decimal, + gint32 scale); +GARROW_AVAILABLE_IN_3_0 +gchar *garrow_decimal256_to_string(GArrowDecimal256 *decimal); +GARROW_AVAILABLE_IN_3_0 +GBytes *garrow_decimal256_to_bytes(GArrowDecimal256 *decimal); +GARROW_AVAILABLE_IN_3_0 +void garrow_decimal256_abs(GArrowDecimal256 *decimal); +GARROW_AVAILABLE_IN_3_0 +void garrow_decimal256_negate(GArrowDecimal256 *decimal); +GARROW_AVAILABLE_IN_3_0 +GArrowDecimal256 *garrow_decimal256_plus(GArrowDecimal256 *left, + GArrowDecimal256 *right); +GARROW_AVAILABLE_IN_3_0 +GArrowDecimal256 *garrow_decimal256_multiply(GArrowDecimal256 *left, + GArrowDecimal256 *right); +GARROW_AVAILABLE_IN_3_0 +GArrowDecimal256 *garrow_decimal256_divide(GArrowDecimal256 *left, + GArrowDecimal256 *right, + GArrowDecimal256 **remainder, + GError **error); +GARROW_AVAILABLE_IN_3_0 +GArrowDecimal256 * +garrow_decimal256_rescale(GArrowDecimal256 *decimal, + gint32 original_scale, + gint32 new_scale, + GError **error); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/decimal.hpp b/src/arrow/c_glib/arrow-glib/decimal.hpp new file mode 100644 index 000000000..054a91b83 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/decimal.hpp @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <memory> + +#include <arrow/util/decimal.h> + +#include <arrow-glib/decimal.h> + +GArrowDecimal128 * +garrow_decimal128_new_raw(std::shared_ptr<arrow::Decimal128> *arrow_decimal128); +std::shared_ptr<arrow::Decimal128> +garrow_decimal128_get_raw(GArrowDecimal128 *decimal); + +GArrowDecimal256 * +garrow_decimal256_new_raw(std::shared_ptr<arrow::Decimal256> *arrow_decimal256); +std::shared_ptr<arrow::Decimal256> +garrow_decimal256_get_raw(GArrowDecimal256 *decimal); diff --git a/src/arrow/c_glib/arrow-glib/enums.c.template b/src/arrow/c_glib/arrow-glib/enums.c.template new file mode 100644 index 000000000..6806ed194 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/enums.c.template @@ -0,0 +1,52 @@ +/*** BEGIN file-header ***/ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/arrow-glib.h> +/*** END file-header ***/ + +/*** BEGIN file-production ***/ + +/* enumerations from "@filename@" */ +/*** END file-production ***/ + +/*** BEGIN value-header ***/ +GType +@enum_name@_get_type(void) +{ + static GType etype = 0; + if (G_UNLIKELY(etype == 0)) { + static const G@Type@Value values[] = { +/*** END value-header ***/ + +/*** BEGIN value-production ***/ + {@VALUENAME@, "@VALUENAME@", "@valuenick@"}, +/*** END value-production ***/ + +/*** BEGIN value-tail ***/ + {0, NULL, NULL} + }; + etype = g_@type@_register_static(g_intern_static_string("@EnumName@"), values); + } + return etype; +} +/*** END value-tail ***/ + +/*** BEGIN file-tail ***/ +/*** END file-tail ***/ diff --git a/src/arrow/c_glib/arrow-glib/enums.h.template b/src/arrow/c_glib/arrow-glib/enums.h.template new file mode 100644 index 000000000..3509ed2e9 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/enums.h.template @@ -0,0 +1,41 @@ +/*** BEGIN file-header ***/ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/error.h> + +G_BEGIN_DECLS +/*** END file-header ***/ + +/*** BEGIN file-production ***/ + +/* enumerations from "@filename@" */ +/*** END file-production ***/ + +/*** BEGIN value-header ***/ +GType @enum_name@_get_type(void) G_GNUC_CONST; +#define @ENUMPREFIX@_TYPE_@ENUMSHORT@ (@enum_name@_get_type()) +/*** END value-header ***/ + +/*** BEGIN file-tail ***/ + +G_END_DECLS +/*** END file-tail ***/ diff --git a/src/arrow/c_glib/arrow-glib/error.cpp b/src/arrow/c_glib/arrow-glib/error.cpp new file mode 100644 index 000000000..ac61ddc49 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/error.cpp @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/error.hpp> + +#include <iostream> +#include <sstream> + +G_BEGIN_DECLS + +/** + * SECTION: error + * @title: GArrowError + * @short_description: Error code mapping between Arrow and arrow-glib + * + * #GArrowError provides error codes corresponding to `arrow::Status` + * values. + */ + +G_DEFINE_QUARK(garrow-error-quark, garrow_error) + +G_END_DECLS + +gboolean +garrow_error_check(GError **error, + const arrow::Status &status, + const char *context) +{ + return garrow::check(error, status, context); +} + +GArrowError +garrow_error_from_status(const arrow::Status &status) +{ + switch (status.code()) { + case arrow::StatusCode::OK: + return GARROW_ERROR_UNKNOWN; + case arrow::StatusCode::OutOfMemory: + return GARROW_ERROR_OUT_OF_MEMORY; + case arrow::StatusCode::KeyError: + return GARROW_ERROR_KEY; + case arrow::StatusCode::TypeError: + return GARROW_ERROR_TYPE; + case arrow::StatusCode::Invalid: + return GARROW_ERROR_INVALID; + case arrow::StatusCode::IOError: + return GARROW_ERROR_IO; + case arrow::StatusCode::CapacityError: + return GARROW_ERROR_CAPACITY; + case arrow::StatusCode::IndexError: + return GARROW_ERROR_INDEX; + case arrow::StatusCode::UnknownError: + return GARROW_ERROR_UNKNOWN; + case arrow::StatusCode::NotImplemented: + return GARROW_ERROR_NOT_IMPLEMENTED; + case arrow::StatusCode::SerializationError: + return GARROW_ERROR_SERIALIZATION; + case arrow::StatusCode::CodeGenError: + return GARROW_ERROR_CODE_GENERATION; + case arrow::StatusCode::ExpressionValidationError: + return GARROW_ERROR_EXPRESSION_VALIDATION; + case arrow::StatusCode::ExecutionError: + return GARROW_ERROR_EXECUTION; + case arrow::StatusCode::AlreadyExists: + return GARROW_ERROR_ALREADY_EXISTS; + default: + return GARROW_ERROR_UNKNOWN; + } +} + +arrow::StatusCode +garrow_error_to_status_code(GError *error, + arrow::StatusCode default_code) +{ + if (error->domain != GARROW_ERROR) { + return default_code; + } + + switch (error->code) { + case GARROW_ERROR_OUT_OF_MEMORY: + return arrow::StatusCode::OutOfMemory; + case GARROW_ERROR_KEY: + return arrow::StatusCode::KeyError; + case GARROW_ERROR_TYPE: + return arrow::StatusCode::TypeError; + case GARROW_ERROR_INVALID: + return arrow::StatusCode::Invalid; + case GARROW_ERROR_IO: + return arrow::StatusCode::IOError; + case GARROW_ERROR_CAPACITY: + return arrow::StatusCode::CapacityError; + case GARROW_ERROR_INDEX: + return arrow::StatusCode::IndexError; + case GARROW_ERROR_UNKNOWN: + return arrow::StatusCode::UnknownError; + case GARROW_ERROR_NOT_IMPLEMENTED: + return arrow::StatusCode::NotImplemented; + case GARROW_ERROR_SERIALIZATION: + return arrow::StatusCode::SerializationError; + case GARROW_ERROR_CODE_GENERATION: + return arrow::StatusCode::CodeGenError; + case GARROW_ERROR_EXPRESSION_VALIDATION: + return arrow::StatusCode::ExpressionValidationError; + case GARROW_ERROR_EXECUTION: + return arrow::StatusCode::ExecutionError; + case GARROW_ERROR_ALREADY_EXISTS: + return arrow::StatusCode::AlreadyExists; + default: + return default_code; + } +} + +arrow::Status +garrow_error_to_status(GError *error, + arrow::StatusCode default_code, + const char *context) +{ + std::stringstream message; + message << context << ": " << g_quark_to_string(error->domain); + message << "(" << error->code << "): "; + message << error->message; + auto code = garrow_error_to_status_code(error, default_code); + g_error_free(error); + return arrow::Status(code, message.str()); +} + +namespace garrow { + gboolean check(GError **error, + const arrow::Status &status, + const char *context) { + if (status.ok()) { + return TRUE; + } else { + g_set_error(error, + GARROW_ERROR, + garrow_error_from_status(status), + "%s: %s", + context, + status.ToString().c_str()); + return FALSE; + } + } +} diff --git a/src/arrow/c_glib/arrow-glib/error.h b/src/arrow/c_glib/arrow-glib/error.h new file mode 100644 index 000000000..d60066359 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/error.h @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <glib-object.h> + +G_BEGIN_DECLS + +/** + * GArrowError: + * @GARROW_ERROR_OUT_OF_MEMORY: Out of memory error. + * @GARROW_ERROR_KEY: Key error. + * @GARROW_ERROR_TYPE: Type error. + * @GARROW_ERROR_INVALID: Invalid value error. + * @GARROW_ERROR_IO: IO error. + * @GARROW_ERROR_CAPACITY: Capacity error. + * @GARROW_ERROR_INDEX: Index error. + * @GARROW_ERROR_UNKNOWN: Unknown error. + * @GARROW_ERROR_NOT_IMPLEMENTED: The feature is not implemented. + * @GARROW_ERROR_SERIALIZATION: Serialization error. + * @GARROW_ERROR_CODE_GENERATION: Error generating code for expression evaluation + * in Gandiva. + * @GARROW_ERROR_EXPRESSION_VALIDATION: Validation errors in expression given for code generation. + * @GARROW_ERROR_EXECUTION: Execution error while evaluating the expression against a record batch. + * @GARROW_ERROR_ALREADY_EXISTS: Item already exists error. + * + * The error codes are used by all arrow-glib functions. + * + * They are corresponding to `arrow::Status` values. + */ +typedef enum { + GARROW_ERROR_OUT_OF_MEMORY = 1, + GARROW_ERROR_KEY, + GARROW_ERROR_TYPE, + GARROW_ERROR_INVALID, + GARROW_ERROR_IO, + GARROW_ERROR_CAPACITY, + GARROW_ERROR_INDEX, + GARROW_ERROR_UNKNOWN = 9, + GARROW_ERROR_NOT_IMPLEMENTED, + GARROW_ERROR_SERIALIZATION, + GARROW_ERROR_CODE_GENERATION = 40, + GARROW_ERROR_EXPRESSION_VALIDATION = 41, + GARROW_ERROR_EXECUTION = 42, + GARROW_ERROR_ALREADY_EXISTS = 45, +} GArrowError; + +#define GARROW_ERROR garrow_error_quark() + +GQuark garrow_error_quark(void); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/error.hpp b/src/arrow/c_glib/arrow-glib/error.hpp new file mode 100644 index 000000000..446342108 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/error.hpp @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/api.h> + +#include <arrow-glib/error.h> + +gboolean garrow_error_check(GError **error, + const arrow::Status &status, + const char *context); +GArrowError garrow_error_from_status(const arrow::Status &status); +arrow::StatusCode +garrow_error_to_status_code(GError *error, + arrow::StatusCode default_code); +arrow::Status garrow_error_to_status(GError *error, + arrow::StatusCode default_code, + const char *context); + +namespace garrow { + gboolean check(GError **error, + const arrow::Status &status, + const char *context); + + template <typename CONTEXT_FUNC> + gboolean check(GError **error, + const arrow::Status &status, + CONTEXT_FUNC &&context_func) { + if (status.ok()) { + return TRUE; + } else { + std::string context = std::move(context_func()); + g_set_error(error, + GARROW_ERROR, + garrow_error_from_status(status), + "%s: %s", + context.c_str(), + status.ToString().c_str()); + return FALSE; + } + } + + template <typename TYPE> + gboolean check(GError **error, + const arrow::Result<TYPE> &result, + const char *context) { + return check(error, result.status(), context); + } + + template <typename TYPE, typename CONTEXT_FUNC> + gboolean check(GError **error, + const arrow::Result<TYPE> &result, + CONTEXT_FUNC &&context_func) { + return check(error, result.status(), context_func); + } +} diff --git a/src/arrow/c_glib/arrow-glib/expression.cpp b/src/arrow/c_glib/arrow-glib/expression.cpp new file mode 100644 index 000000000..406e121cd --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/expression.cpp @@ -0,0 +1,265 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/compute.hpp> +#include <arrow-glib/datum.hpp> +#include <arrow-glib/expression.hpp> +#include <arrow-glib/error.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: expression + * @section_id: expression + * @title: Expression + * @include: arrow-glib/arrow-glib.h + * + * #GArrowExpression is a base class for all expression classes such + * as #GArrowLiteralExpression. + * + * #GArrowLiteralExpression is a class for literal value. + * + * #GArrowFieldExpression is a class for field reference. + * + * #GArrowCallExpression is a class for function call. + * + * Since: 6.0.0 + */ + +typedef struct GArrowExpressionPrivate_ { + arrow::compute::Expression expression; +} GArrowExpressionPrivate; + +G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowExpression, + garrow_expression, + G_TYPE_OBJECT) + +#define GARROW_EXPRESSION_GET_PRIVATE(object) \ + static_cast<GArrowExpressionPrivate *>( \ + garrow_expression_get_instance_private( \ + GARROW_EXPRESSION(object))) + +static void +garrow_expression_finalize(GObject *object) +{ + auto priv = GARROW_EXPRESSION_GET_PRIVATE(object); + priv->expression.~Expression(); + G_OBJECT_CLASS(garrow_expression_parent_class)->finalize(object); +} + +static void +garrow_expression_init(GArrowExpression *object) +{ + auto priv = GARROW_EXPRESSION_GET_PRIVATE(object); + new(&priv->expression) arrow::compute::Expression(); +} + +static void +garrow_expression_class_init(GArrowExpressionClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_expression_finalize; +} + +/** + * garrow_expression_to_string: + * @expression: A #GArrowExpression. + * + * Returns: The formatted expression. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 6.0.0 + */ +gchar * +garrow_expression_to_string(GArrowExpression *expression) +{ + auto priv = GARROW_EXPRESSION_GET_PRIVATE(expression); + auto string = priv->expression.ToString(); + return g_strndup(string.data(), string.size()); +} + +/** + * garrow_expression_equal: + * @expression: A #GArrowExpression. + * @other_expression: A #GArrowExpression. + * + * Returns: %TRUE if both of them have the same content, %FALSE + * otherwise. + * + * Since: 6.0.0 + */ +gboolean +garrow_expression_equal(GArrowExpression *expression, + GArrowExpression *other_expression) +{ + auto priv = GARROW_EXPRESSION_GET_PRIVATE(expression); + auto other_priv = GARROW_EXPRESSION_GET_PRIVATE(other_expression); + return priv->expression.Equals(other_priv->expression); +} + + +G_DEFINE_TYPE(GArrowLiteralExpression, + garrow_literal_expression, + GARROW_TYPE_EXPRESSION) + +static void +garrow_literal_expression_init(GArrowLiteralExpression *object) +{ +} + +static void +garrow_literal_expression_class_init(GArrowLiteralExpressionClass *klass) +{ +} + +/** + * garrow_literal_expression_new: + * @datum: A #GArrowDatum. + * + * Returns: A newly created #GArrowLiteralExpression. + * + * Since: 6.0.0 + */ +GArrowLiteralExpression * +garrow_literal_expression_new(GArrowDatum *datum) +{ + auto arrow_datum = garrow_datum_get_raw(datum); + auto arrow_expression = arrow::compute::literal(arrow_datum); + return GARROW_LITERAL_EXPRESSION(garrow_expression_new_raw(arrow_expression)); +} + + +G_DEFINE_TYPE(GArrowFieldExpression, + garrow_field_expression, + GARROW_TYPE_EXPRESSION) + +static void +garrow_field_expression_init(GArrowFieldExpression *object) +{ +} + +static void +garrow_field_expression_class_init(GArrowFieldExpressionClass *klass) +{ +} + +/** + * garrow_field_expression_new: + * @reference: A field name or dot path. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: A newly created #GArrowFieldExpression on sucess, %NULL on + * error. + * + * Since: 6.0.0 + */ +GArrowFieldExpression * +garrow_field_expression_new(const gchar *reference, + GError **error) +{ + if (reference && reference[0] == '.') { + auto arrow_reference_result = arrow::FieldRef::FromDotPath(reference); + if (!garrow::check(error, + arrow_reference_result, + "[field-expression][new]")) { + return NULL; + } + auto arrow_expression = arrow::compute::field_ref(*arrow_reference_result); + return GARROW_FIELD_EXPRESSION(garrow_expression_new_raw(arrow_expression)); + } else { + arrow::FieldRef arrow_reference(reference); + auto arrow_expression = arrow::compute::field_ref(arrow_reference); + return GARROW_FIELD_EXPRESSION(garrow_expression_new_raw(arrow_expression)); + } +} + + +G_DEFINE_TYPE(GArrowCallExpression, + garrow_call_expression, + GARROW_TYPE_EXPRESSION) + +static void +garrow_call_expression_init(GArrowCallExpression *object) +{ +} + +static void +garrow_call_expression_class_init(GArrowCallExpressionClass *klass) +{ +} + +/** + * garrow_call_expression_new: + * @function: A name of function to be called. + * @arguments: (element-type GArrowExpression): Arguments of this call. + * @options: (nullable): A #GArrowFunctionOptions for the called function. + * + * Returns: A newly created #GArrowCallExpression. + * + * Since: 6.0.0 + */ +GArrowCallExpression * +garrow_call_expression_new(const gchar *function, + GList *arguments, + GArrowFunctionOptions *options) +{ + std::vector<arrow::compute::Expression> arrow_arguments; + for (GList *node = arguments; node; node = node->next) { + auto argument = GARROW_EXPRESSION(node->data); + auto arrow_argument = garrow_expression_get_raw(argument); + arrow_arguments.push_back(*arrow_argument); + } + std::shared_ptr<arrow::compute::FunctionOptions> arrow_options; + if (options) { + arrow_options.reset(garrow_function_options_get_raw(options)); + } + auto arrow_expression = arrow::compute::call(function, + arrow_arguments, + arrow_options); + return GARROW_CALL_EXPRESSION(garrow_expression_new_raw(arrow_expression)); +} + + +G_END_DECLS + +GArrowExpression * +garrow_expression_new_raw(const arrow::compute::Expression &arrow_expression) +{ + GType gtype = GARROW_TYPE_EXPRESSION; + if (arrow_expression.literal()) { + gtype = GARROW_TYPE_LITERAL_EXPRESSION; + } else if (arrow_expression.parameter()) { + gtype = GARROW_TYPE_FIELD_EXPRESSION; + } else if (arrow_expression.call()) { + gtype = GARROW_TYPE_CALL_EXPRESSION; + } + auto expression = GARROW_EXPRESSION(g_object_new(gtype, NULL)); + auto priv = GARROW_EXPRESSION_GET_PRIVATE(expression); + priv->expression = arrow_expression; + return expression; +} + +arrow::compute::Expression * +garrow_expression_get_raw(GArrowExpression *expression) +{ + auto priv = GARROW_EXPRESSION_GET_PRIVATE(expression); + return &(priv->expression); +} diff --git a/src/arrow/c_glib/arrow-glib/expression.h b/src/arrow/c_glib/arrow-glib/expression.h new file mode 100644 index 000000000..1c1a5fbab --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/expression.h @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/compute.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_EXPRESSION (garrow_expression_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowExpression, + garrow_expression, + GARROW, + EXPRESSION, + GObject) +struct _GArrowExpressionClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +gchar * +garrow_expression_to_string(GArrowExpression *expression); +GARROW_AVAILABLE_IN_6_0 +gboolean +garrow_expression_equal(GArrowExpression *expression, + GArrowExpression *other_expression); + + +#define GARROW_TYPE_LITERAL_EXPRESSION (garrow_literal_expression_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowLiteralExpression, + garrow_literal_expression, + GARROW, + LITERAL_EXPRESSION, + GArrowExpression) +struct _GArrowLiteralExpressionClass +{ + GArrowExpressionClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GArrowLiteralExpression * +garrow_literal_expression_new(GArrowDatum *datum); + + +#define GARROW_TYPE_FIELD_EXPRESSION (garrow_field_expression_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowFieldExpression, + garrow_field_expression, + GARROW, + FIELD_EXPRESSION, + GArrowExpression) +struct _GArrowFieldExpressionClass +{ + GArrowExpressionClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GArrowFieldExpression * +garrow_field_expression_new(const gchar *reference, + GError **error); + + +#define GARROW_TYPE_CALL_EXPRESSION (garrow_call_expression_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowCallExpression, + garrow_call_expression, + GARROW, + CALL_EXPRESSION, + GArrowExpression) +struct _GArrowCallExpressionClass +{ + GArrowExpressionClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GArrowCallExpression * +garrow_call_expression_new(const gchar *function, + GList *arguments, + GArrowFunctionOptions *options); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/expression.hpp b/src/arrow/c_glib/arrow-glib/expression.hpp new file mode 100644 index 000000000..ea872bb53 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/expression.hpp @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/compute/exec/expression.h> + +#include <arrow-glib/expression.h> + + +GArrowExpression * +garrow_expression_new_raw(const arrow::compute::Expression &arrow_expression); +arrow::compute::Expression * +garrow_expression_get_raw(GArrowExpression *expression); diff --git a/src/arrow/c_glib/arrow-glib/field.cpp b/src/arrow/c_glib/arrow-glib/field.cpp new file mode 100644 index 000000000..1bb2dd181 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/field.cpp @@ -0,0 +1,441 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/data-type.hpp> +#include <arrow-glib/error.hpp> +#include <arrow-glib/field.hpp> +#include <arrow-glib/internal-hash-table.hpp> + +#include <arrow/c/bridge.h> + +G_BEGIN_DECLS + +/** + * SECTION: field + * @short_description: Field class + * + * #GArrowField is a class for field. Field is metadata of a + * column. It has name, data type (#GArrowDataType) and nullable + * information of the column. + */ + +typedef struct GArrowFieldPrivate_ { + std::shared_ptr<arrow::Field> field; + GArrowDataType *data_type; +} GArrowFieldPrivate; + +enum { + PROP_FIELD = 1, + PROP_DATA_TYPE +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowField, + garrow_field, + G_TYPE_OBJECT) + +#define GARROW_FIELD_GET_PRIVATE(obj) \ + static_cast<GArrowFieldPrivate *>( \ + garrow_field_get_instance_private( \ + GARROW_FIELD(obj))) + +static void +garrow_field_dispose(GObject *object) +{ + auto priv = GARROW_FIELD_GET_PRIVATE(object); + + if (priv->data_type) { + g_object_unref(priv->data_type); + priv->data_type = nullptr; + } + + G_OBJECT_CLASS(garrow_field_parent_class)->dispose(object); +} + +static void +garrow_field_finalize(GObject *object) +{ + auto priv = GARROW_FIELD_GET_PRIVATE(object); + + priv->field.~shared_ptr(); + + G_OBJECT_CLASS(garrow_field_parent_class)->finalize(object); +} + +static void +garrow_field_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_FIELD_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_FIELD: + priv->field = + *static_cast<std::shared_ptr<arrow::Field> *>(g_value_get_pointer(value)); + break; + case PROP_DATA_TYPE: + priv->data_type = GARROW_DATA_TYPE(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_field_init(GArrowField *object) +{ + auto priv = GARROW_FIELD_GET_PRIVATE(object); + new(&priv->field) std::shared_ptr<arrow::Field>; +} + +static void +garrow_field_class_init(GArrowFieldClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_field_dispose; + gobject_class->finalize = garrow_field_finalize; + gobject_class->set_property = garrow_field_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("field", + "Field", + "The raw std::shared<arrow::Field> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_FIELD, spec); + + spec = g_param_spec_object("data-type", + "Data type", + "The data type", + GARROW_TYPE_DATA_TYPE, + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_DATA_TYPE, spec); +} + +/** + * garrow_field_import: + * @c_abi_schema: (not nullable): A `struct ArrowSchema *`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): An imported #GArrowField on success, + * %NULL on error. + * + * You don't need to release the passed `struct ArrowSchema *`, + * even if this function reports an error. + * + * Since: 6.0.0 + */ +GArrowField * +garrow_field_import(gpointer c_abi_schema, GError **error) +{ + auto arrow_field_result = + arrow::ImportField(static_cast<ArrowSchema *>(c_abi_schema)); + if (garrow::check(error, arrow_field_result, "[field][import]")) { + return garrow_field_new_raw(&(*arrow_field_result), nullptr); + } else { + return NULL; + } +} + +/** + * garrow_field_new: + * @name: The name of the field. + * @data_type: The data type of the field. + * + * Returns: A newly created #GArrowField. + */ +GArrowField * +garrow_field_new(const gchar *name, + GArrowDataType *data_type) +{ + auto arrow_data_type = garrow_data_type_get_raw(data_type); + auto arrow_field = std::make_shared<arrow::Field>(name, arrow_data_type); + return garrow_field_new_raw(&arrow_field, data_type); +} + +/** + * garrow_field_new_full: + * @name: The name of the field. + * @data_type: The data type of the field. + * @nullable: Whether null may be included or not. + * + * Returns: A newly created #GArrowField. + */ +GArrowField * +garrow_field_new_full(const gchar *name, + GArrowDataType *data_type, + gboolean nullable) +{ + auto arrow_field = + std::make_shared<arrow::Field>(name, + garrow_data_type_get_raw(data_type), + nullable); + return garrow_field_new_raw(&arrow_field, data_type); +} + +/** + * garrow_field_export: + * @field: A #GArrowField. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): An exported #GArrowField as + * `struct ArrowStruct *` on success, %NULL on error. + * + * It should be freed with the `ArrowSchema::release` callback then + * g_free() when no longer needed. + * + * Since: 6.0.0 + */ +gpointer +garrow_field_export(GArrowField *field, GError **error) +{ + const auto arrow_field = garrow_field_get_raw(field); + auto c_abi_schema = g_new(ArrowSchema, 1); + auto status = arrow::ExportField(*arrow_field, c_abi_schema); + if (garrow::check(error, status, "[field][export]")) { + return c_abi_schema; + } else { + g_free(c_abi_schema); + return NULL; + } +} + +/** + * garrow_field_get_name: + * @field: A #GArrowField. + * + * Returns: The name of the field. + */ +const gchar * +garrow_field_get_name(GArrowField *field) +{ + const auto arrow_field = garrow_field_get_raw(field); + return arrow_field->name().c_str(); +} + +/** + * garrow_field_get_data_type: + * @field: A #GArrowField. + * + * Returns: (transfer none): The data type of the field. + */ +GArrowDataType * +garrow_field_get_data_type(GArrowField *field) +{ + auto priv = GARROW_FIELD_GET_PRIVATE(field); + return priv->data_type; +} + +/** + * garrow_field_is_nullable: + * @field: A #GArrowField. + * + * Returns: Whether the filed may include null or not. + */ +gboolean +garrow_field_is_nullable(GArrowField *field) +{ + const auto arrow_field = garrow_field_get_raw(field); + return arrow_field->nullable(); +} + +/** + * garrow_field_equal: + * @field: A #GArrowField. + * @other_field: A #GArrowField to be compared. + * + * Returns: %TRUE if both of them have the same data, %FALSE + * otherwise. + */ +gboolean +garrow_field_equal(GArrowField *field, + GArrowField *other_field) +{ + const auto arrow_field = garrow_field_get_raw(field); + const auto arrow_other_field = garrow_field_get_raw(other_field); + return arrow_field->Equals(arrow_other_field); +} + +/** + * garrow_field_to_string: + * @field: A #GArrowField. + * + * Returns: The string representation of the field. + */ +gchar * +garrow_field_to_string(GArrowField *field) +{ + const auto arrow_field = garrow_field_get_raw(field); + return g_strdup(arrow_field->ToString().c_str()); +} + +/** + * garrow_field_to_string_metadata: + * @field: A #GArrowField. + * @show_metadata: Whether include metadata or not. + * + * Returns: The string representation of the field. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 3.0.0 + */ +gchar * +garrow_field_to_string_metadata(GArrowField *field, gboolean show_metadata) +{ + const auto arrow_field = garrow_field_get_raw(field); + return g_strdup(arrow_field->ToString(show_metadata).c_str()); +} + +/** + * garrow_field_has_metadata: + * @field: A #GArrowField. + * + * Returns: %TRUE if the field has metadata, %FALSE otherwise. + * + * Since: 3.0.0 + */ +gboolean +garrow_field_has_metadata(GArrowField *field) +{ + const auto arrow_field = garrow_field_get_raw(field); + return arrow_field->HasMetadata(); +} + +/** + * garrow_field_get_metadata: + * @field: A #GArrowField. + * + * Returns: (element-type utf8 utf8) (nullable) (transfer full): The + * metadata in the field. + * + * It should be freed with g_hash_table_unref() when no longer needed. + * + * Since: 3.0.0 + */ +GHashTable * +garrow_field_get_metadata(GArrowField *field) +{ + const auto arrow_field = garrow_field_get_raw(field); + if (!arrow_field->HasMetadata()) { + return NULL; + } + + auto arrow_metadata = arrow_field->metadata(); + auto metadata = g_hash_table_new(g_str_hash, g_str_equal); + const auto n = arrow_metadata->size(); + for (int64_t i = 0; i < n; ++i) { + g_hash_table_insert(metadata, + const_cast<gchar *>(arrow_metadata->key(i).c_str()), + const_cast<gchar *>(arrow_metadata->value(i).c_str())); + } + return metadata; +} + +/** + * garrow_field_with_metadata: + * @field: A #GArrowField. + * @metadata: (element-type utf8 utf8): A new associated metadata. + * + * Returns: (transfer full): The new field with the given metadata. + * + * Since: 3.0.0 + */ +GArrowField * +garrow_field_with_metadata(GArrowField *field, + GHashTable *metadata) +{ + const auto arrow_field = garrow_field_get_raw(field); + auto arrow_metadata = garrow_internal_hash_table_to_metadata(metadata); + auto arrow_new_field = arrow_field->WithMetadata(arrow_metadata); + return garrow_field_new_raw(&arrow_new_field, + garrow_field_get_data_type(field)); +} + +/** + * garrow_field_with_merged_metadata: + * @field: A #GArrowField. + * @metadata: (element-type utf8 utf8): An additional associated metadata. + * + * Returns: (transfer full): The new field that also has the given + * metadata. If both of the existing metadata and the given metadata + * have the same keys, the values in the given metadata are used. + * + * Since: 3.0.0 + */ +GArrowField * +garrow_field_with_merged_metadata(GArrowField *field, + GHashTable *metadata) +{ + const auto arrow_field = garrow_field_get_raw(field); + auto arrow_metadata = garrow_internal_hash_table_to_metadata(metadata); + auto arrow_new_field = arrow_field->WithMergedMetadata(arrow_metadata); + return garrow_field_new_raw(&arrow_new_field, + garrow_field_get_data_type(field)); +} + +/** + * garrow_field_remove_metadata: + * @field: A #GArrowField. + * + * Returns: (transfer full): The new field that doesn't have metadata. + * + * Since: 3.0.0 + */ +GArrowField * +garrow_field_remove_metadata(GArrowField *field) +{ + const auto arrow_field = garrow_field_get_raw(field); + auto arrow_new_field = arrow_field->RemoveMetadata(); + return garrow_field_new_raw(&arrow_new_field, + garrow_field_get_data_type(field)); +} + +G_END_DECLS + +GArrowField * +garrow_field_new_raw(std::shared_ptr<arrow::Field> *arrow_field, + GArrowDataType *data_type) +{ + bool data_type_need_unref = false; + if (!data_type) { + auto arrow_data_type = (*arrow_field)->type(); + data_type = garrow_data_type_new_raw(&arrow_data_type); + data_type_need_unref = true; + } + auto field = GARROW_FIELD(g_object_new(GARROW_TYPE_FIELD, + "field", arrow_field, + "data-type", data_type, + NULL)); + if (data_type_need_unref) { + g_object_unref(data_type); + } + return field; +} + +std::shared_ptr<arrow::Field> +garrow_field_get_raw(GArrowField *field) +{ + auto priv = GARROW_FIELD_GET_PRIVATE(field); + return priv->field; +} diff --git a/src/arrow/c_glib/arrow-glib/field.h b/src/arrow/c_glib/arrow-glib/field.h new file mode 100644 index 000000000..60689c36e --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/field.h @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/basic-data-type.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_FIELD (garrow_field_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowField, + garrow_field, + GARROW, + FIELD, + GObject) +struct _GArrowFieldClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GArrowField * +garrow_field_import(gpointer c_abi_schema, + GError **error); + +GArrowField *garrow_field_new (const gchar *name, + GArrowDataType *data_type); +GArrowField *garrow_field_new_full (const gchar *name, + GArrowDataType *data_type, + gboolean nullable); + +GARROW_AVAILABLE_IN_6_0 +gpointer +garrow_field_export(GArrowField *field, + GError **error); + +const gchar *garrow_field_get_name (GArrowField *field); +GArrowDataType *garrow_field_get_data_type (GArrowField *field); +gboolean garrow_field_is_nullable (GArrowField *field); + +gboolean garrow_field_equal (GArrowField *field, + GArrowField *other_field); + +gchar * +garrow_field_to_string(GArrowField *field); +GARROW_AVAILABLE_IN_3_0 +gchar * +garrow_field_to_string_metadata(GArrowField *field, + gboolean show_metadata); + +GARROW_AVAILABLE_IN_3_0 +gboolean +garrow_field_has_metadata(GArrowField *field); +GARROW_AVAILABLE_IN_3_0 +GHashTable * +garrow_field_get_metadata(GArrowField *field); +GARROW_AVAILABLE_IN_3_0 +GArrowField * +garrow_field_with_metadata(GArrowField *field, + GHashTable *metadata); +GARROW_AVAILABLE_IN_3_0 +GArrowField * +garrow_field_with_merged_metadata(GArrowField *field, + GHashTable *metadata); +GARROW_AVAILABLE_IN_3_0 +GArrowField * +garrow_field_remove_metadata(GArrowField *field); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/field.hpp b/src/arrow/c_glib/arrow-glib/field.hpp new file mode 100644 index 000000000..f8d0d46c9 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/field.hpp @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/api.h> + +#include <arrow-glib/field.h> + +GArrowField *garrow_field_new_raw(std::shared_ptr<arrow::Field> *arrow_field, + GArrowDataType *data_type); +std::shared_ptr<arrow::Field> garrow_field_get_raw(GArrowField *field); diff --git a/src/arrow/c_glib/arrow-glib/file-mode.cpp b/src/arrow/c_glib/arrow-glib/file-mode.cpp new file mode 100644 index 000000000..dba31eac8 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/file-mode.cpp @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/file-mode.hpp> + +/** + * SECTION: file-mode + * @title: GArrowFileMode + * @short_description: File mode mapping between Arrow and arrow-glib + * + * #GArrowFileMode provides file modes corresponding to + * `arrow::io::FileMode::type` values. + */ + +GArrowFileMode +garrow_file_mode_from_raw(arrow::io::FileMode::type mode) +{ + switch (mode) { + case arrow::io::FileMode::type::READ: + return GARROW_FILE_MODE_READ; + case arrow::io::FileMode::type::WRITE: + return GARROW_FILE_MODE_WRITE; + case arrow::io::FileMode::type::READWRITE: + return GARROW_FILE_MODE_READWRITE; + default: + return GARROW_FILE_MODE_READ; + } +} + +arrow::io::FileMode::type +garrow_file_mode_to_raw(GArrowFileMode mode) +{ + switch (mode) { + case GARROW_FILE_MODE_READ: + return arrow::io::FileMode::type::READ; + case GARROW_FILE_MODE_WRITE: + return arrow::io::FileMode::type::WRITE; + case GARROW_FILE_MODE_READWRITE: + return arrow::io::FileMode::type::READWRITE; + default: + return arrow::io::FileMode::type::READ; + } +} diff --git a/src/arrow/c_glib/arrow-glib/file-mode.h b/src/arrow/c_glib/arrow-glib/file-mode.h new file mode 100644 index 000000000..8812af805 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/file-mode.h @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <glib-object.h> + +G_BEGIN_DECLS + +/** + * GArrowFileMode: + * @GARROW_FILE_MODE_READ: For read. + * @GARROW_FILE_MODE_WRITE: For write. + * @GARROW_FILE_MODE_READWRITE: For read-write. + * + * They are corresponding to `arrow::io::FileMode::type` values. + */ +typedef enum { + GARROW_FILE_MODE_READ, + GARROW_FILE_MODE_WRITE, + GARROW_FILE_MODE_READWRITE +} GArrowFileMode; + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/file-mode.hpp b/src/arrow/c_glib/arrow-glib/file-mode.hpp new file mode 100644 index 000000000..2b6737942 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/file-mode.hpp @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/io/interfaces.h> + +#include <arrow-glib/file-mode.h> + +GArrowFileMode garrow_file_mode_from_raw(arrow::io::FileMode::type mode); +arrow::io::FileMode::type garrow_file_mode_to_raw(GArrowFileMode mode); diff --git a/src/arrow/c_glib/arrow-glib/file-system.cpp b/src/arrow/c_glib/arrow-glib/file-system.cpp new file mode 100644 index 000000000..2c2c36e74 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/file-system.cpp @@ -0,0 +1,1450 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/enums.h> + +#include <arrow-glib/error.hpp> +#include <arrow-glib/file-system.hpp> +#include <arrow-glib/input-stream.hpp> +#include <arrow-glib/local-file-system.h> +#include <arrow-glib/output-stream.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: file-system + * @section_id: file-system-classes + * @title: File system classes + * @include: arrow-glib/arrow-glib.h + * + * #GArrowFileInfo is a class for information for a file system entry. + * + * #GArrowFileSelector is a class for a selector for file system APIs. + * + * #GArrowFileSystem is an interface for file system. + * + * #GArrowSubTreeFileSystem is a delegator to another file system that is + * a logical view of a subtree of a file system, such as a directory in + * a local file system. + * + * #GArrowSlowFileSystem is a delegator to another file system. + * This inserts latencies at various points. + * + * #GArrowMockFileSystem is a class for mock file system that holds + * its contents in memory. + * + * #GArrowHDFSFileSystem is a class for HDFS-backed file system. + * + * #GArrowS3FileSystem is a class for S3-backed file system. + */ + +/* arrow::fs::FileInfo */ + +typedef struct GArrowFileInfoPrivate_ { + arrow::fs::FileInfo file_info; +} GArrowFileInfoPrivate; + +enum { + PROP_FILE_INFO_TYPE = 1, + PROP_FILE_INFO_PATH, + PROP_FILE_INFO_BASE_NAME, + PROP_FILE_INFO_DIR_NAME, + PROP_FILE_INFO_EXTENSION, + PROP_FILE_INFO_SIZE, + PROP_FILE_INFO_MTIME, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowFileInfo, garrow_file_info, G_TYPE_OBJECT) + +#define GARROW_FILE_INFO_GET_PRIVATE(obj) \ + static_cast<GArrowFileInfoPrivate *>( \ + garrow_file_info_get_instance_private( \ + GARROW_FILE_INFO(obj))) + +static void +garrow_file_info_finalize(GObject *object) +{ + auto priv = GARROW_FILE_INFO_GET_PRIVATE(object); + + priv->file_info.~FileInfo(); + + G_OBJECT_CLASS(garrow_file_info_parent_class)->finalize(object); +} + +static void +garrow_file_info_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto arrow_file_info = garrow_file_info_get_raw(GARROW_FILE_INFO(object)); + + switch (prop_id) { + case PROP_FILE_INFO_TYPE: + { + auto arrow_file_type = + static_cast<arrow::fs::FileType>(g_value_get_enum(value)); + arrow_file_info->set_type(arrow_file_type); + } + break; + case PROP_FILE_INFO_PATH: + arrow_file_info->set_path(g_value_get_string(value)); + break; + case PROP_FILE_INFO_SIZE: + arrow_file_info->set_size(g_value_get_int64(value)); + break; + case PROP_FILE_INFO_MTIME: + { + const gint64 mtime = g_value_get_int64(value); + const arrow::fs::TimePoint::duration duration(mtime); + arrow_file_info->set_mtime(arrow::fs::TimePoint(duration)); + } + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_file_info_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + const auto arrow_file_info = + garrow_file_info_get_raw(GARROW_FILE_INFO(object)); + + switch (prop_id) { + case PROP_FILE_INFO_TYPE: + { + const auto arrow_file_type = arrow_file_info->type(); + const auto file_type = static_cast<GArrowFileType>(arrow_file_type); + g_value_set_enum(value, file_type); + } + break; + case PROP_FILE_INFO_PATH: + g_value_set_string(value, arrow_file_info->path().c_str()); + break; + case PROP_FILE_INFO_BASE_NAME: + g_value_set_string(value, arrow_file_info->base_name().c_str()); + break; + case PROP_FILE_INFO_DIR_NAME: + g_value_set_string(value, arrow_file_info->dir_name().c_str()); + break; + case PROP_FILE_INFO_EXTENSION: + g_value_set_string(value, arrow_file_info->extension().c_str()); + break; + case PROP_FILE_INFO_SIZE: + g_value_set_int64(value, arrow_file_info->size()); + break; + case PROP_FILE_INFO_MTIME: + { + const auto arrow_mtime = arrow_file_info->mtime(); + const auto mtime = arrow_mtime.time_since_epoch().count(); + g_value_set_int64(value, mtime); + } + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_file_info_init(GArrowFileInfo *object) +{ + auto priv = GARROW_FILE_INFO_GET_PRIVATE(object); + new(&priv->file_info) arrow::fs::FileInfo; +} + +static void +garrow_file_info_class_init(GArrowFileInfoClass *klass) +{ + GParamSpec *spec; + + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_file_info_finalize; + gobject_class->set_property = garrow_file_info_set_property; + gobject_class->get_property = garrow_file_info_get_property; + + auto info = arrow::fs::FileInfo(); + + /** + * GArrowFileInfo:type: + * + * The type of the entry. + * + * Since: 0.17.0 + */ + spec = g_param_spec_enum("type", + "Type", + "The type of the entry", + GARROW_TYPE_FILE_TYPE, + GARROW_FILE_TYPE_UNKNOWN, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_FILE_INFO_TYPE, spec); + + /** + * GArrowFileInfo:path: + * + * The full file path in the file system. + * + * Since: 0.17.0 + */ + spec = g_param_spec_string("path", + "Path", + "The full file path", + info.path().c_str(), + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_FILE_INFO_PATH, spec); + + /** + * GArrowFileInfo:base-name: + * + * The file base name (component after the last directory separator). + * + * Since: 0.17.0 + */ + spec = g_param_spec_string("base-name", + "Base name", + "The file base name", + info.base_name().c_str(), + static_cast<GParamFlags>(G_PARAM_READABLE)); + g_object_class_install_property(gobject_class, + PROP_FILE_INFO_BASE_NAME, + spec); + + /** + * GArrowFileInfo:dir-name: + * + * The directory base name (component before the file base name). + * + * Since: 0.17.0 + */ + spec = g_param_spec_string("dir-name", + "Directory name", + "The directory base name", + info.dir_name().c_str(), + static_cast<GParamFlags>(G_PARAM_READABLE)); + g_object_class_install_property(gobject_class, + PROP_FILE_INFO_DIR_NAME, + spec); + + /** + * GArrowFileInfo:extension: + * + * The file extension (excluding the dot). + * + * Since: 0.17.0 + */ + spec = g_param_spec_string("extension", + "Extension", + "The file extension", + info.extension().c_str(), + static_cast<GParamFlags>(G_PARAM_READABLE)); + g_object_class_install_property(gobject_class, + PROP_FILE_INFO_EXTENSION, + spec); + + /** + * GArrowFileInfo:size: + * + * The size in bytes, if available + * Only regular files are guaranteed to have a size. + * + * Since: 0.17.0 + */ + spec = g_param_spec_int64("size", + "Size", + "The size in bytes", + arrow::fs::kNoSize, + INT64_MAX, + info.size(), + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_FILE_INFO_SIZE, spec); + + /** + * GArrowFileInfo:mtime: + * + * The time of last modification, if available. + * + * Since: 0.17.0 + */ + spec = g_param_spec_int64("mtime", + "Last modified time", + "The time of last modification", + arrow::fs::kNoTime.time_since_epoch().count(), + INT64_MAX, + info.mtime().time_since_epoch().count(), + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_FILE_INFO_MTIME, spec); +} + +/** + * garrow_file_info_new: + * + * Returns: A newly created #GArrowFileInfo. + * + * Since: 0.17.0 + */ +GArrowFileInfo * +garrow_file_info_new(void) +{ + return GARROW_FILE_INFO(g_object_new(GARROW_TYPE_FILE_INFO, NULL)); +} + +/** + * garrow_file_info_equal: + * @file_info: A #GArrowFileInfo. + * @other_file_info: A #GArrowFileInfo to be compared. + * + * Returns: %TRUE if both of them have the same data, %FALSE + * otherwise. + * + * Since: 0.17.0 + */ +gboolean +garrow_file_info_equal(GArrowFileInfo *file_info, + GArrowFileInfo *other_file_info) +{ + const auto arrow_file_info = garrow_file_info_get_raw(file_info); + const auto arrow_other_file_info = garrow_file_info_get_raw(other_file_info); + return arrow_file_info->Equals(*arrow_other_file_info); +} + +/** + * garrow_file_info_is_file: + * @file_info: A #GArrowFileInfo. + * + * Returns: %TRUE if the entry is a file, %FALSE otherwise. + * + * Since: 0.17.0 + */ +gboolean +garrow_file_info_is_file(GArrowFileInfo *file_info) +{ + const auto arrow_file_info = garrow_file_info_get_raw(file_info); + return arrow_file_info->IsFile(); +} + +/** + * garrow_file_info_is_dir + * @file_info: A #GArrowFileInfo. + * + * Returns: %TRUE if the entry is a directory, %FALSE otherwise. + * + * Since: 0.17.0 + */ +gboolean +garrow_file_info_is_dir(GArrowFileInfo *file_info) +{ + const auto arrow_file_info = garrow_file_info_get_raw(file_info); + return arrow_file_info->IsDirectory(); +} + +/** + * garrow_file_info_to_string: + * @file_info: A #GArrowFileInfo. + * + * Returns: The string representation of the file statistics. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 0.17.0 + */ +gchar * +garrow_file_info_to_string(GArrowFileInfo *file_info) +{ + const auto arrow_file_info = garrow_file_info_get_raw(file_info); + return g_strdup(arrow_file_info->ToString().c_str()); +} + +/* arrow::fs::FileSelector */ + +typedef struct GArrowFileSelectorPrivate_ { + arrow::fs::FileSelector file_selector; +} GArrowFileSelectorPrivate; + +enum { + PROP_FILE_SELECTOR_BASE_DIR = 1, + PROP_FILE_SELECTOR_ALLOW_NOT_FOUND, + PROP_FILE_SELECTOR_RECURSIVE, + PROP_FILE_SELECTOR_MAX_RECURSION +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowFileSelector, garrow_file_selector, G_TYPE_OBJECT) + +#define GARROW_FILE_SELECTOR_GET_PRIVATE(obj) \ + static_cast<GArrowFileSelectorPrivate *>( \ + garrow_file_selector_get_instance_private( \ + GARROW_FILE_SELECTOR(obj))) + +static void +garrow_file_selector_finalize(GObject *object) +{ + auto priv = GARROW_FILE_SELECTOR_GET_PRIVATE(object); + + priv->file_selector.~FileSelector(); + + G_OBJECT_CLASS(garrow_file_selector_parent_class)->finalize(object); +} + +static void +garrow_file_selector_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_FILE_SELECTOR_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_FILE_SELECTOR_BASE_DIR: + priv->file_selector.base_dir = g_value_get_string(value); + break; + case PROP_FILE_SELECTOR_ALLOW_NOT_FOUND: + priv->file_selector.allow_not_found = g_value_get_boolean(value); + break; + case PROP_FILE_SELECTOR_RECURSIVE: + priv->file_selector.recursive = g_value_get_boolean(value); + break; + case PROP_FILE_SELECTOR_MAX_RECURSION: + priv->file_selector.max_recursion = g_value_get_int(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_file_selector_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_FILE_SELECTOR_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_FILE_SELECTOR_BASE_DIR: + g_value_set_string(value, priv->file_selector.base_dir.c_str()); + break; + case PROP_FILE_SELECTOR_ALLOW_NOT_FOUND: + g_value_set_boolean(value, priv->file_selector.allow_not_found); + break; + case PROP_FILE_SELECTOR_RECURSIVE: + g_value_set_boolean(value, priv->file_selector.recursive); + break; + case PROP_FILE_SELECTOR_MAX_RECURSION: + g_value_set_int(value, priv->file_selector.max_recursion); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_file_selector_init(GArrowFileSelector *object) +{ + auto priv = GARROW_FILE_SELECTOR_GET_PRIVATE(object); + new(&priv->file_selector) arrow::fs::FileSelector; +} + +static void +garrow_file_selector_class_init(GArrowFileSelectorClass *klass) +{ + GParamSpec *spec; + + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_file_selector_finalize; + gobject_class->set_property = garrow_file_selector_set_property; + gobject_class->get_property = garrow_file_selector_get_property; + + auto file_selector = arrow::fs::FileSelector(); + + /** + * GArrowFileSelector:base-dir: + * + * The directory in which to select files. + * If the path exists but doesn't point to a directory, this should + * be an error. + * + * Since: 0.17.0 + */ + spec = g_param_spec_string("base-dir", + "Base dir", + "The directory in which to select files", + file_selector.base_dir.c_str(), + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_FILE_SELECTOR_BASE_DIR, + spec); + + /** + * GArrowFileSelector:allow-not-found: + * + * The behavior if `base_dir` isn't found in the file system. + * If false, an error is returned. If true, an empty selection is returned. + * + * Since: 0.17.0 + */ + spec = g_param_spec_boolean("allow-not-found", + "Allow not found", + "The behavior if `base_dir` isn't found in the file system", + file_selector.allow_not_found, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_FILE_SELECTOR_ALLOW_NOT_FOUND, + spec); + + /** + * GArrowFileSelector:recursive: + * + * Whether to recurse into subdirectories. + * + * Since: 0.17.0 + */ + spec = g_param_spec_boolean("recursive", + "Recursive", + "Whether to recurse into subdirectories", + file_selector.recursive, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_FILE_SELECTOR_RECURSIVE, + spec); + + /** + * GArrowFileSelector:max-recursion: + * + * The maximum number of subdirectories to recurse into. + * + * Since: 0.17.0 + */ + spec = g_param_spec_int("max-recursion", + "Max recursion", + "The maximum number of subdirectories to recurse into", + 0, + INT32_MAX, + file_selector.max_recursion, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_FILE_SELECTOR_MAX_RECURSION, + spec); +} + +/* arrow::fs::FileSystem */ + +typedef struct GArrowFileSystemPrivate_ { + std::shared_ptr<arrow::fs::FileSystem> file_system; +} GArrowFileSystemPrivate; + +enum { + PROP_FILE_SYSTEM = 1 +}; + +G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowFileSystem, + garrow_file_system, + G_TYPE_OBJECT) + +#define GARROW_FILE_SYSTEM_GET_PRIVATE(obj) \ + static_cast<GArrowFileSystemPrivate *>( \ + garrow_file_system_get_instance_private( \ + GARROW_FILE_SYSTEM(obj))) + +static void +garrow_file_system_finalize(GObject *object) +{ + auto priv = GARROW_FILE_SYSTEM_GET_PRIVATE(object); + + priv->file_system.~shared_ptr(); + + G_OBJECT_CLASS(garrow_file_system_parent_class)->finalize(object); +} + +static void +garrow_file_system_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_FILE_SYSTEM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_FILE_SYSTEM: + priv->file_system = + *static_cast<std::shared_ptr<arrow::fs::FileSystem> *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_file_system_init(GArrowFileSystem *object) +{ + auto priv = GARROW_FILE_SYSTEM_GET_PRIVATE(object); + new(&priv->file_system) std::shared_ptr<arrow::fs::FileSystem>; +} + +static void +garrow_file_system_class_init(GArrowFileSystemClass *klass) +{ + GParamSpec *spec; + + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_file_system_finalize; + gobject_class->set_property = garrow_file_system_set_property; + + spec = g_param_spec_pointer("file-system", + "FileSystem", + "The raw std::shared<arrow::fs::FileSystem> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_FILE_SYSTEM, spec); +} + +/** + * garrow_file_system_create: + * @uri: An URI to specify file system with options. If you only have an + * absolute path, g_filename_to_uri() will help you. + * @error: (nullable): Return location for a #GError or %NULL. + * + * This is a factory function to create a specific #GArrowFileSystem + * object. + * + * Returns: (nullable) (transfer full): The newly created file system + * that is an object of a subclass of #GArrowFileSystem. + * + * Since: 3.0.0 + */ +GArrowFileSystem * +garrow_file_system_create(const gchar *uri, GError **error) +{ + auto arrow_file_system_result = arrow::fs::FileSystemFromUri(uri); + if (garrow::check(error, + arrow_file_system_result, + "[file-system][create]")) { + auto arrow_file_system = *arrow_file_system_result; + return garrow_file_system_new_raw(&arrow_file_system); + } else { + return NULL; + } +} + +/** + * garrow_file_system_get_type_name: + * @file_system: A #GArrowFileSystem. + * + * Returns: The name of file system type. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 0.17.0 + */ +gchar * +garrow_file_system_get_type_name(GArrowFileSystem *file_system) +{ + auto arrow_file_system = garrow_file_system_get_raw(file_system); + const auto &type_name = arrow_file_system->type_name(); + return g_strndup(type_name.data(), type_name.size()); +} + +/** + * garrow_file_system_get_file_info: + * @file_system: A #GArrowFileSystem. + * @path: The path of the target. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Get information for the given target. + * + * Any symlink is automatically dereferenced, recursively. + * A non-existing or unreachable file returns an OK status and has + * a #GArrowFileType of value %GARROW_FILE_TYPE_NOT_FOUND. + * An error status indicates a truly exceptional condition + * (low-level I/O error, etc.). + * + * Returns: (nullable) (transfer full): A #GArrowFileInfo. + * + * Since: 0.17.0 + */ +GArrowFileInfo * +garrow_file_system_get_file_info(GArrowFileSystem *file_system, + const gchar *path, + GError **error) +{ + auto arrow_file_system = garrow_file_system_get_raw(file_system); + auto arrow_result = arrow_file_system->GetFileInfo(path); + if (garrow::check(error, arrow_result, "[file-system][get-file-info]")) { + const auto &arrow_file_info = *arrow_result; + return garrow_file_info_new_raw(arrow_file_info); + } else { + return NULL; + } +} + +static inline GList * +garrow_file_infos_new(arrow::Result<std::vector<arrow::fs::FileInfo>>&& arrow_result, + GError **error, + const gchar *context) +{ + if (garrow::check(error, arrow_result, context)) { + auto arrow_file_infos = *arrow_result; + GList *file_infos = NULL; + for (auto arrow_file_info : arrow_file_infos) { + auto file_info = garrow_file_info_new_raw(arrow_file_info); + file_infos = g_list_prepend(file_infos, file_info); + } + return g_list_reverse(file_infos); + } else { + return NULL; + } +} + +/** + * garrow_file_system_get_file_infos_paths: + * @file_system: A #GArrowFileSystem. + * @paths: (array length=n_paths): The paths of the targets. + * @n_paths: The number of items in @paths. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Get information same as garrow_file_system_get_file_info() + * for the given many targets at once. + * + * Returns: (element-type GArrowFileInfo) (transfer full): + * A list of #GArrowFileInfo. + * + * Since: 0.17.0 + */ +GList * +garrow_file_system_get_file_infos_paths(GArrowFileSystem *file_system, + const gchar **paths, + gsize n_paths, + GError **error) +{ + auto arrow_file_system = garrow_file_system_get_raw(file_system); + std::vector<std::string> arrow_paths; + for (gsize i = 0; i < n_paths; ++i) { + arrow_paths.push_back(paths[i]); + } + return garrow_file_infos_new(arrow_file_system->GetFileInfo(arrow_paths), + error, + "[file-system][get-file-infos][paths]"); +} + +/** + * garrow_file_system_get_file_infos_selector: + * @file_system: A #GArrowFileSystem. + * @file_selector: A #GArrowFileSelector. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Get information same as garrow_file_system_get_file_info() + * according to a selector. + * + * The selector's base directory will not be part of the results, + * even if it exists. + * + * Returns: (element-type GArrowFileInfo) (transfer full): + * A list of #GArrowFileInfo. + * + * Since: 0.17.0 + */ +GList * +garrow_file_system_get_file_infos_selector(GArrowFileSystem *file_system, + GArrowFileSelector *file_selector, + GError **error) +{ + auto arrow_file_system = garrow_file_system_get_raw(file_system); + const auto &arrow_file_selector = + GARROW_FILE_SELECTOR_GET_PRIVATE(file_selector)->file_selector; + return garrow_file_infos_new(arrow_file_system->GetFileInfo(arrow_file_selector), + error, + "[file-system][get-file-infos][selector]"); +} + +/** + * garrow_file_system_create_dir: + * @file_system: A #GArrowFileSystem. + * @path: The paths of the directory. + * @recursive: Whether creating directory recursively or not. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Create a directory and subdirectories. + * This function succeeds if the directory already exists. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.17.0 + */ +gboolean +garrow_file_system_create_dir(GArrowFileSystem *file_system, + const gchar *path, + gboolean recursive, + GError **error) +{ + auto arrow_file_system = garrow_file_system_get_raw(file_system); + auto status = arrow_file_system->CreateDir(path, recursive); + return garrow::check(error, status, "[file-system][create-dir]"); +} + +/** + * garrow_file_system_delete_dir: + * @file_system: A #GArrowFileSystem. + * @path: The paths of the directory. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Delete a directory and its contents, recursively. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.17.0 + */ +gboolean +garrow_file_system_delete_dir(GArrowFileSystem *file_system, + const gchar *path, + GError **error) +{ + auto arrow_file_system = garrow_file_system_get_raw(file_system); + auto status = arrow_file_system->DeleteDir(path); + return garrow::check(error, status, "[file-system][delete-dir]"); +} + +/** + * garrow_file_system_delete_dir_contents: + * @file_system: A #GArrowFileSystem. + * @path: The paths of the directory. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Delete a directory's contents, recursively. Like + * garrow_file_system_delete_dir(), but doesn't delete the directory + * itself. Passing an empty path (`""`) will wipe the entire file + * system tree. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.17.0 + */ +gboolean +garrow_file_system_delete_dir_contents(GArrowFileSystem *file_system, + const gchar *path, + GError **error) +{ + auto arrow_file_system = garrow_file_system_get_raw(file_system); + auto status = arrow_file_system->DeleteDirContents(path); + return garrow::check(error, status, "[file-system][delete-dir-contents]"); +} + +/** + * garrow_file_system_delete_file: + * @file_system: A #GArrowFileSystem. + * @path: The paths of the file to be delete. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Delete a file. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.17.0 + */ +gboolean +garrow_file_system_delete_file(GArrowFileSystem *file_system, + const gchar *path, + GError **error) +{ + auto arrow_file_system = garrow_file_system_get_raw(file_system); + auto status = arrow_file_system->DeleteFile(path); + return garrow::check(error, status, "[file-system][delete-file]"); +} + +/** + * garrow_file_system_delete_files: + * @file_system: A #GArrowFileSystem. + * @paths: (array length=n_paths): + * The paths of the files to be delete. + * @n_paths: The number of items in @paths. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Delete many files. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.17.0 + */ +gboolean +garrow_file_system_delete_files(GArrowFileSystem *file_system, + const gchar **paths, + gsize n_paths, + GError **error) +{ + auto arrow_file_system = garrow_file_system_get_raw(file_system); + std::vector<std::string> arrow_paths; + arrow_paths.reserve(n_paths); + for (gsize i = 0; i < n_paths; ++i) { + arrow_paths.emplace_back(paths[i]); + } + auto status = arrow_file_system->DeleteFiles(arrow_paths); + return garrow::check(error, status, "[file-system][delete-files]"); +} + +/** + * garrow_file_system_move: + * @file_system: A #GArrowFileSystem. + * @src: The path of the source file. + * @dest: The path of the destination. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Move / rename a file or a directory. + * If the destination exists: + * - if it is a non-empty directory, an error is returned + * - otherwise, if it has the same type as the source, it is replaced + * - otherwise, behavior is unspecified (implementation-dependent). + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.17.0 + */ +gboolean +garrow_file_system_move(GArrowFileSystem *file_system, + const gchar *src, + const gchar *dest, + GError **error) +{ + auto arrow_file_system = garrow_file_system_get_raw(file_system); + auto status = arrow_file_system->Move(src, dest); + return garrow::check(error, status, "[file-system][move]"); +} + +/** + * garrow_file_system_copy_file: + * @file_system: A #GArrowFileSystem. + * @src: The path of the source file. + * @dest: The path of the destination. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Copy a file. + * If the destination exists and is a directory, an error is returned. + * Otherwise, it is replaced. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.17.0 + */ +gboolean +garrow_file_system_copy_file(GArrowFileSystem *file_system, + const gchar *src, + const gchar *dest, + GError **error) +{ + auto arrow_file_system = garrow_file_system_get_raw(file_system); + auto status = arrow_file_system->CopyFile(src, dest); + return garrow::check(error, status, "[file-system][copy-file]"); +} + +/** + * garrow_file_system_open_input_stream: + * @file_system: A #GArrowFileSystem. + * @path: The path of the input stream. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Open an input stream for sequential reading. + * + * Returns: (nullable) (transfer full): A newly created + * #GArrowInputStream. + * + * Since: 0.17.0 + */ +GArrowInputStream * +garrow_file_system_open_input_stream(GArrowFileSystem *file_system, + const gchar *path, + GError **error) +{ + auto arrow_file_system = garrow_file_system_get_raw(file_system); + auto arrow_input_stream = arrow_file_system->OpenInputStream(path); + if (garrow::check(error, + arrow_input_stream, + "[file-system][open-input-stream]")) { + return garrow_input_stream_new_raw(&(*arrow_input_stream)); + } else { + return NULL; + } +} + +/** + * garrow_file_system_open_input_file: + * @file_system: A #GArrowFileSystem. + * @path: The path of the input file. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Open an input file for random access reading. + * + * Returns: (nullable) (transfer full): A newly created + * #GArrowSeekableInputStream. + * + * Since: 0.17.0 + */ +GArrowSeekableInputStream * +garrow_file_system_open_input_file(GArrowFileSystem *file_system, + const gchar *path, + GError **error) +{ + auto arrow_file_system = garrow_file_system_get_raw(file_system); + auto arrow_random_access_file = arrow_file_system->OpenInputFile(path); + if (garrow::check(error, + arrow_random_access_file, + "[file-system][open-input-file]")) { + return garrow_seekable_input_stream_new_raw(&(*arrow_random_access_file)); + } else { + return NULL; + } +} + +/** + * garrow_file_system_open_output_stream: + * @file_system: A #GArrowFileSystem. + * @path: The path of the output stream. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Open an output stream for sequential writing. + * If the target already exists, the existing data is truncated. + * + * Returns: (nullable) (transfer full): A newly created + * #GArrowOutputStream. + * + * Since: 0.17.0 + */ +GArrowOutputStream * +garrow_file_system_open_output_stream(GArrowFileSystem *file_system, + const gchar *path, + GError **error) +{ + auto arrow_file_system = garrow_file_system_get_raw(file_system); + auto arrow_output_stream = arrow_file_system->OpenOutputStream(path); + if (garrow::check(error, + arrow_output_stream, + "[file-system][open-output-stream]")) { + return garrow_output_stream_new_raw(&(*arrow_output_stream)); + } else { + return NULL; + } +} + +/** + * garrow_file_system_open_append_stream: + * @file_system: A #GArrowFileSystem. + * @path: The path of the output stream. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Open an output stream for appending. + * If the target doesn't exist, a new empty file is created. + * + * Returns: (nullable) (transfer full): A newly created #GArrowOutputStream + * for appending. + * + * Since: 0.17.0 + */ +GArrowOutputStream * +garrow_file_system_open_append_stream(GArrowFileSystem *file_system, + const gchar *path, + GError **error) +{ + auto arrow_file_system = garrow_file_system_get_raw(file_system); + auto arrow_output_stream = arrow_file_system->OpenAppendStream(path); + if (garrow::check(error, + arrow_output_stream, + "[file-system][open-append-stream]")) { + return garrow_output_stream_new_raw(&(*arrow_output_stream)); + } else { + return NULL; + } +} + +/* arrow::fs::SubTreeFileSystem */ + +typedef struct GArrowSubTreeFileSystemPrivate_ { + GArrowFileSystem *base_file_system; +} GArrowSubTreeFileSystemPrivate; + +enum { + PROP_BASE_FILE_SYSTEM = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowSubTreeFileSystem, + garrow_sub_tree_file_system, + GARROW_TYPE_FILE_SYSTEM) + +#define GARROW_SUB_TREE_FILE_SYSTEM_GET_PRIVATE(object) \ + static_cast<GArrowSubTreeFileSystemPrivate *>( \ + garrow_sub_tree_file_system_get_instance_private( \ + GARROW_SUB_TREE_FILE_SYSTEM(object))) + +static void +garrow_sub_tree_file_system_dispose(GObject *object) +{ + auto priv = GARROW_SUB_TREE_FILE_SYSTEM_GET_PRIVATE(object); + + if (priv->base_file_system) { + g_object_unref(priv->base_file_system); + priv->base_file_system = NULL; + } + + G_OBJECT_CLASS(garrow_sub_tree_file_system_parent_class)->dispose(object); +} + +static void +garrow_sub_tree_file_system_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_SUB_TREE_FILE_SYSTEM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_BASE_FILE_SYSTEM: + priv->base_file_system = GARROW_FILE_SYSTEM(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_sub_tree_file_system_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_SUB_TREE_FILE_SYSTEM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_BASE_FILE_SYSTEM: + g_value_set_object(value, priv->base_file_system); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_sub_tree_file_system_init(GArrowSubTreeFileSystem *file_system) +{ +} + +static void +garrow_sub_tree_file_system_class_init(GArrowSubTreeFileSystemClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->dispose = garrow_sub_tree_file_system_dispose; + gobject_class->set_property = garrow_sub_tree_file_system_set_property; + gobject_class->get_property = garrow_sub_tree_file_system_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("base-file-system", + "Base file system", + "The base GArrowFileSystem", + GARROW_TYPE_FILE_SYSTEM, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_BASE_FILE_SYSTEM, spec); +} + +/** + * garrow_sub_tree_file_system_new: + * @base_path: A base path of the sub tree file system. + * @base_file_system: A #GArrowFileSystem as the base file system. + * + * Returns: (transfer full): A newly created #GArrowSubTreeFileSystem. + * + * Since: 0.17.0 + */ +GArrowSubTreeFileSystem * +garrow_sub_tree_file_system_new(const gchar *base_path, + GArrowFileSystem *base_file_system) +{ + auto arrow_base_file_system = garrow_file_system_get_raw(base_file_system); + auto arrow_sub_tree_file_system = + std::static_pointer_cast<arrow::fs::FileSystem>( + std::make_shared<arrow::fs::SubTreeFileSystem>(base_path, + arrow_base_file_system)); + return garrow_sub_tree_file_system_new_raw(&arrow_sub_tree_file_system, + base_file_system); +} + +/* arrow::fs::SlowFileSystem */ + +typedef struct GArrowSlowFileSystemPrivate_ { + GArrowFileSystem *base_file_system; +} GArrowSlowFileSystemPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowSlowFileSystem, + garrow_slow_file_system, + GARROW_TYPE_FILE_SYSTEM) + +#define GARROW_SLOW_FILE_SYSTEM_GET_PRIVATE(object) \ + static_cast<GArrowSlowFileSystemPrivate *>( \ + garrow_slow_file_system_get_instance_private( \ + GARROW_SLOW_FILE_SYSTEM(object))) + +static void +garrow_slow_file_system_dispose(GObject *object) +{ + auto priv = GARROW_SLOW_FILE_SYSTEM_GET_PRIVATE(object); + + if (priv->base_file_system) { + g_object_unref(priv->base_file_system); + priv->base_file_system = NULL; + } + + G_OBJECT_CLASS(garrow_slow_file_system_parent_class)->dispose(object); +} + +static void +garrow_slow_file_system_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_SLOW_FILE_SYSTEM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_BASE_FILE_SYSTEM: + priv->base_file_system = GARROW_FILE_SYSTEM(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_slow_file_system_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_SLOW_FILE_SYSTEM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_BASE_FILE_SYSTEM: + g_value_set_object(value, priv->base_file_system); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_slow_file_system_init(GArrowSlowFileSystem *file_system) +{ +} + +static void +garrow_slow_file_system_class_init(GArrowSlowFileSystemClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->dispose = garrow_slow_file_system_dispose; + gobject_class->set_property = garrow_slow_file_system_set_property; + gobject_class->get_property = garrow_slow_file_system_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("base-file-system", + "Base file system", + "The base GArrowFileSystem", + GARROW_TYPE_FILE_SYSTEM, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_BASE_FILE_SYSTEM, spec); +} + +/** + * garrow_slow_file_system_new_average_latency: + * @base_file_system: A #GArrowFileSystem as the base file system. + * @average_latency: The average value of the latency. + * + * The latency is normally distributed with a standard deviation of + * @average_latency * 0.1. + * + * The random seed is given by the default random device. + * + * Returns: (transfer full): A newly created #GArrowSlowFileSystem. + * + * Since: 0.17.0 + */ +GArrowSlowFileSystem * +garrow_slow_file_system_new_average_latency(GArrowFileSystem *base_file_system, + gdouble average_latency) +{ + auto arrow_base_file_system = garrow_file_system_get_raw(base_file_system); + auto arrow_slow_file_system = + std::static_pointer_cast<arrow::fs::FileSystem>( + std::make_shared<arrow::fs::SlowFileSystem>(arrow_base_file_system, + average_latency)); + return garrow_slow_file_system_new_raw(&arrow_slow_file_system, + base_file_system); +} + +/** + * garrow_slow_file_system_new_average_latency_and_seed: + * @base_file_system: A #GArrowFileSystem as the base file system. + * @average_latency: The average value of the latency. + * @seed: A random seed. + * + * The latency is normally distributed with a standard deviation of + * @average_latency * 0.1. + * + * Returns: (transfer full): A newly created #GArrowSlowFileSystem. + * + * Since: 0.17.0 + */ +GArrowSlowFileSystem * +garrow_slow_file_system_new_average_latency_and_seed(GArrowFileSystem *base_file_system, + gdouble average_latency, + gint32 seed) +{ + auto arrow_base_file_system = garrow_file_system_get_raw(base_file_system); + auto arrow_slow_file_system = + std::static_pointer_cast<arrow::fs::FileSystem>( + std::make_shared<arrow::fs::SlowFileSystem>(arrow_base_file_system, + average_latency, + seed)); + return garrow_slow_file_system_new_raw(&arrow_slow_file_system, + base_file_system); +} + + +G_DEFINE_TYPE(GArrowMockFileSystem, + garrow_mock_file_system, + GARROW_TYPE_FILE_SYSTEM) + +static void +garrow_mock_file_system_init(GArrowMockFileSystem *file_system) +{ +} + +static void +garrow_mock_file_system_class_init(GArrowMockFileSystemClass *klass) +{ +} + + +G_DEFINE_TYPE(GArrowHDFSFileSystem, + garrow_hdfs_file_system, + GARROW_TYPE_FILE_SYSTEM) + +static void +garrow_hdfs_file_system_init(GArrowHDFSFileSystem *file_system) +{ +} + +static void +garrow_hdfs_file_system_class_init(GArrowHDFSFileSystemClass *klass) +{ +} + + +G_DEFINE_TYPE(GArrowS3FileSystem, + garrow_s3_file_system, + GARROW_TYPE_FILE_SYSTEM) + +static void +garrow_s3_file_system_init(GArrowS3FileSystem *file_system) +{ +} + +static void +garrow_s3_file_system_class_init(GArrowS3FileSystemClass *klass) +{ +} + + +G_END_DECLS + +GArrowFileInfo * +garrow_file_info_new_raw(const arrow::fs::FileInfo &arrow_file_info) +{ + auto file_info = garrow_file_info_new(); + GARROW_FILE_INFO_GET_PRIVATE(file_info)->file_info = arrow_file_info; + return file_info; +} + +arrow::fs::FileInfo * +garrow_file_info_get_raw(GArrowFileInfo *file_info) +{ + auto priv = GARROW_FILE_INFO_GET_PRIVATE(file_info); + return &(priv->file_info); +} + +GArrowFileSystem * +garrow_file_system_new_raw( + std::shared_ptr<arrow::fs::FileSystem> *arrow_file_system) +{ + const auto &type_name = (*arrow_file_system)->type_name(); + + GType file_system_type = GARROW_TYPE_FILE_SYSTEM; + if (type_name == "local") { + file_system_type = GARROW_TYPE_LOCAL_FILE_SYSTEM; + } else if (type_name == "hdfs") { + file_system_type = GARROW_TYPE_HDFS_FILE_SYSTEM; + } else if (type_name == "s3") { + file_system_type = GARROW_TYPE_S3_FILE_SYSTEM; + } else if (type_name == "mock") { + file_system_type = GARROW_TYPE_MOCK_FILE_SYSTEM; + } + + return GARROW_FILE_SYSTEM(g_object_new(file_system_type, + "file-system", arrow_file_system, + NULL)); +} + +std::shared_ptr<arrow::fs::FileSystem> +garrow_file_system_get_raw(GArrowFileSystem *file_system) +{ + auto priv = GARROW_FILE_SYSTEM_GET_PRIVATE(file_system); + return priv->file_system; +} + +GArrowSubTreeFileSystem * +garrow_sub_tree_file_system_new_raw( + std::shared_ptr<arrow::fs::FileSystem> *arrow_file_system, + GArrowFileSystem *base_file_system) +{ + return GARROW_SUB_TREE_FILE_SYSTEM( + g_object_new(GARROW_TYPE_SUB_TREE_FILE_SYSTEM, + "file-system", arrow_file_system, + "base-file-system", base_file_system, + NULL)); +} + +GArrowSlowFileSystem * +garrow_slow_file_system_new_raw( + std::shared_ptr<arrow::fs::FileSystem> *arrow_file_system, + GArrowFileSystem *base_file_system) +{ + return GARROW_SLOW_FILE_SYSTEM( + g_object_new(GARROW_TYPE_SLOW_FILE_SYSTEM, + "file-system", arrow_file_system, + "base-file-system", base_file_system, + NULL)); +} diff --git a/src/arrow/c_glib/arrow-glib/file-system.h b/src/arrow/c_glib/arrow-glib/file-system.h new file mode 100644 index 000000000..dc9fba7dd --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/file-system.h @@ -0,0 +1,283 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <glib-object.h> + +#include <arrow-glib/input-stream.h> +#include <arrow-glib/output-stream.h> +#include <arrow-glib/version.h> + +G_BEGIN_DECLS + +/* arrow::fs::TimePoint */ +typedef gint64 GArrowTimePoint; + +/* arrow::fs::FileType */ + +/** + * GArrowFileType + * @GARROW_FILE_TYPE_NOT_FOUND: Entry is not found + * @GARROW_FILE_TYPE_UNKNOWN: Entry exists but its type is unknown + * @GARROW_FILE_TYPE_FILE: Entry is a regular file + * @GARROW_FILE_TYPE_DIR: Entry is a directory + * + * They are corresponding to `arrow::fs::FileType` values. + * + * Since: 1.0.0 + */ +typedef enum { + GARROW_FILE_TYPE_NOT_FOUND, + GARROW_FILE_TYPE_UNKNOWN, + GARROW_FILE_TYPE_FILE, + GARROW_FILE_TYPE_DIR +} GArrowFileType; + + +/* arrow::fs::FileInfo */ + +#define GARROW_TYPE_FILE_INFO (garrow_file_info_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowFileInfo, + garrow_file_info, + GARROW, + FILE_INFO, + GObject) +struct _GArrowFileInfoClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_17 +GArrowFileInfo *garrow_file_info_new(void); + +GARROW_AVAILABLE_IN_0_17 +gboolean garrow_file_info_equal(GArrowFileInfo *file_info, + GArrowFileInfo *other_file_info); + +GARROW_AVAILABLE_IN_0_17 +gboolean garrow_file_info_is_file(GArrowFileInfo *file_info); +GARROW_AVAILABLE_IN_0_17 +gboolean garrow_file_info_is_dir(GArrowFileInfo *file_info); +GARROW_AVAILABLE_IN_0_17 +gchar *garrow_file_info_to_string(GArrowFileInfo *file_info); + +/* arrow::fs::FileSelector */ + +#define GARROW_TYPE_FILE_SELECTOR (garrow_file_selector_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowFileSelector, + garrow_file_selector, + GARROW, + FILE_SELECTOR, + GObject) +struct _GArrowFileSelectorClass +{ + GObjectClass parent_class; +}; + +/* arrow::fs::FileSystem */ + +#define GARROW_TYPE_FILE_SYSTEM (garrow_file_system_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowFileSystem, + garrow_file_system, + GARROW, + FILE_SYSTEM, + GObject) +struct _GArrowFileSystemClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_3_0 +GArrowFileSystem * +garrow_file_system_create(const gchar *uri, + GError **error); + +GARROW_AVAILABLE_IN_0_17 +gchar *garrow_file_system_get_type_name(GArrowFileSystem *file_system); + +GARROW_AVAILABLE_IN_0_17 +GArrowFileInfo * +garrow_file_system_get_file_info(GArrowFileSystem *file_system, + const gchar *path, + GError **error); + +GARROW_AVAILABLE_IN_0_17 +GList *garrow_file_system_get_file_infos_paths(GArrowFileSystem *file_system, + const gchar **paths, + gsize n_paths, + GError **error); + +GARROW_AVAILABLE_IN_0_17 +GList * +garrow_file_system_get_file_infos_selector(GArrowFileSystem *file_system, + GArrowFileSelector *file_selector, + GError **error); + +GARROW_AVAILABLE_IN_0_17 +gboolean garrow_file_system_create_dir(GArrowFileSystem *file_system, + const gchar *path, + gboolean recursive, + GError **error); + +GARROW_AVAILABLE_IN_0_17 +gboolean garrow_file_system_delete_dir(GArrowFileSystem *file_system, + const gchar *path, + GError **error); + +GARROW_AVAILABLE_IN_0_17 +gboolean garrow_file_system_delete_dir_contents(GArrowFileSystem *file_system, + const gchar *path, + GError **error); + +GARROW_AVAILABLE_IN_0_17 +gboolean garrow_file_system_delete_file(GArrowFileSystem *file_system, + const gchar *path, + GError **error); + +GARROW_AVAILABLE_IN_0_17 +gboolean garrow_file_system_delete_files(GArrowFileSystem *file_system, + const gchar **paths, + gsize n_paths, + GError **error); + +GARROW_AVAILABLE_IN_0_17 +gboolean garrow_file_system_move(GArrowFileSystem *file_system, + const gchar *src, + const gchar *dest, + GError **error); + +GARROW_AVAILABLE_IN_0_17 +gboolean garrow_file_system_copy_file(GArrowFileSystem *file_system, + const gchar *src, + const gchar *dest, + GError **error); + +GARROW_AVAILABLE_IN_0_17 +GArrowInputStream * +garrow_file_system_open_input_stream(GArrowFileSystem *file_system, + const gchar *path, + GError **error); + +GARROW_AVAILABLE_IN_0_17 +GArrowSeekableInputStream * +garrow_file_system_open_input_file(GArrowFileSystem *file_system, + const gchar *path, + GError **error); + +GARROW_AVAILABLE_IN_0_17 +GArrowOutputStream * +garrow_file_system_open_output_stream(GArrowFileSystem *file_system, + const gchar *path, + GError **error); + +GARROW_AVAILABLE_IN_0_17 +GArrowOutputStream * +garrow_file_system_open_append_stream(GArrowFileSystem *file_system, + const gchar *path, + GError **error); + +/* arrow::fs::SubTreeFileSystem */ + +#define GARROW_TYPE_SUB_TREE_FILE_SYSTEM (garrow_sub_tree_file_system_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowSubTreeFileSystem, + garrow_sub_tree_file_system, + GARROW, + SUB_TREE_FILE_SYSTEM, + GArrowFileSystem) +struct _GArrowSubTreeFileSystemClass +{ + GArrowFileSystemClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_17 +GArrowSubTreeFileSystem * +garrow_sub_tree_file_system_new(const gchar *base_path, + GArrowFileSystem *base_file_system); + +/* arrow::fs::SlowFileSystem */ + +#define GARROW_TYPE_SLOW_FILE_SYSTEM (garrow_slow_file_system_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowSlowFileSystem, + garrow_slow_file_system, + GARROW, + SLOW_FILE_SYSTEM, + GArrowFileSystem) +struct _GArrowSlowFileSystemClass +{ + GArrowFileSystemClass parent_class; +}; + +/* TODO: GArrowLatencyGenerator +GARROW_AVAILABLE_IN_0_17 +GArrowSlowFileSystem * +garrow_slow_file_system_new(GArrowFileSystem *base_file_system, + GArrowLatencyGenerator *latencies); +*/ + +GARROW_AVAILABLE_IN_0_17 +GArrowSlowFileSystem * +garrow_slow_file_system_new_average_latency(GArrowFileSystem *base_file_system, + gdouble average_latency); + +GARROW_AVAILABLE_IN_0_17 +GArrowSlowFileSystem * +garrow_slow_file_system_new_average_latency_and_seed(GArrowFileSystem *base_file_system, + gdouble average_latency, + gint32 seed); + + + +#define GARROW_TYPE_MOCK_FILE_SYSTEM (garrow_mock_file_system_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowMockFileSystem, + garrow_mock_file_system, + GARROW, + MOCK_FILE_SYSTEM, + GArrowFileSystem) +struct _GArrowMockFileSystemClass +{ + GArrowFileSystemClass parent_class; +}; + + +#define GARROW_TYPE_HDFS_FILE_SYSTEM (garrow_hdfs_file_system_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowHDFSFileSystem, + garrow_hdfs_file_system, + GARROW, + HDFS_FILE_SYSTEM, + GArrowFileSystem) +struct _GArrowHDFSFileSystemClass +{ + GArrowFileSystemClass parent_class; +}; + + +#define GARROW_TYPE_S3_FILE_SYSTEM (garrow_s3_file_system_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowS3FileSystem, + garrow_s3_file_system, + GARROW, + S3_FILE_SYSTEM, + GArrowFileSystem) +struct _GArrowS3FileSystemClass +{ + GArrowFileSystemClass parent_class; +}; + + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/file-system.hpp b/src/arrow/c_glib/arrow-glib/file-system.hpp new file mode 100644 index 000000000..6130d2df5 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/file-system.hpp @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/filesystem/api.h> + +#include <arrow-glib/file-system.h> + +GArrowFileInfo * +garrow_file_info_new_raw(const arrow::fs::FileInfo &arrow_file_info); + +arrow::fs::FileInfo * +garrow_file_info_get_raw(GArrowFileInfo *file_info); + +GArrowFileSystem * +garrow_file_system_new_raw( + std::shared_ptr<arrow::fs::FileSystem> *arrow_file_system); + +std::shared_ptr<arrow::fs::FileSystem> +garrow_file_system_get_raw(GArrowFileSystem *file_system); + +GArrowSubTreeFileSystem * +garrow_sub_tree_file_system_new_raw( + std::shared_ptr<arrow::fs::FileSystem> *arrow_file_system, + GArrowFileSystem *base_file_system); + +GArrowSlowFileSystem * +garrow_slow_file_system_new_raw( + std::shared_ptr<arrow::fs::FileSystem> *arrow_file_system, + GArrowFileSystem *base_file_system); + diff --git a/src/arrow/c_glib/arrow-glib/file.cpp b/src/arrow/c_glib/arrow-glib/file.cpp new file mode 100644 index 000000000..422336b90 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/file.cpp @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow/api.h> + +#include <arrow-glib/error.hpp> +#include <arrow-glib/file.hpp> +#include <arrow-glib/file-mode.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: file + * @title: GArrowFile + * @short_description: File interface + * + * #GArrowFile is an interface for file. + */ + +G_DEFINE_INTERFACE(GArrowFile, + garrow_file, + G_TYPE_OBJECT) + +static void +garrow_file_default_init(GArrowFileInterface *iface) +{ +} + +/** + * garrow_file_close: + * @file: A #GArrowFile. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + */ +gboolean +garrow_file_close(GArrowFile *file, + GError **error) +{ + auto arrow_file = garrow_file_get_raw(file); + + auto status = arrow_file->Close(); + return garrow_error_check(error, status, "[io][file][close]"); +} + +/** + * garrow_file_is_closed: + * @file: A #GArrowFile. + * + * Returns: %TRUE if the @file is already closed, %FALSE otherwise. + * + * Since: 0.13.0 + */ +gboolean +garrow_file_is_closed(GArrowFile *file) +{ + auto arrow_file = garrow_file_get_raw(file); + return arrow_file->closed(); +} + +/** + * garrow_file_tell: + * @file: A #GArrowFile. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: The current offset on success, -1 if there was an error. + */ +gint64 +garrow_file_tell(GArrowFile *file, + GError **error) +{ + auto arrow_file = garrow_file_get_raw(file); + + const auto position = arrow_file->Tell(); + if (garrow::check(error, position, "[io][file][tell]")) { + return position.ValueOrDie(); + } else { + return -1; + } +} + +/** + * garrow_file_get_mode: + * @file: A #GArrowFile. + * + * Returns: The mode of the file. + */ +GArrowFileMode +garrow_file_get_mode(GArrowFile *file) +{ + auto arrow_file = garrow_file_get_raw(file); + + auto arrow_mode = arrow_file->mode(); + return garrow_file_mode_from_raw(arrow_mode); +} + +G_END_DECLS + +std::shared_ptr<arrow::io::FileInterface> +garrow_file_get_raw(GArrowFile *file) +{ + auto *iface = GARROW_FILE_GET_IFACE(file); + return iface->get_raw(file); +} diff --git a/src/arrow/c_glib/arrow-glib/file.h b/src/arrow/c_glib/arrow-glib/file.h new file mode 100644 index 000000000..45319b85c --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/file.h @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/file-mode.h> +#include <arrow-glib/gobject-type.h> +#include <arrow-glib/version.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_FILE (garrow_file_get_type()) +G_DECLARE_INTERFACE(GArrowFile, + garrow_file, + GARROW, + FILE, + GObject) + +gboolean garrow_file_close(GArrowFile *file, + GError **error); +GARROW_AVAILABLE_IN_0_13 +gboolean garrow_file_is_closed(GArrowFile *file); +gint64 garrow_file_tell(GArrowFile *file, + GError **error); +GArrowFileMode garrow_file_get_mode(GArrowFile *file); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/file.hpp b/src/arrow/c_glib/arrow-glib/file.hpp new file mode 100644 index 000000000..c4cc78747 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/file.hpp @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/io/interfaces.h> + +#include <arrow-glib/file.h> + +/** + * GArrowFileInterface: + * + * It wraps `arrow::io::FileInterface`. + */ +struct _GArrowFileInterface +{ + GTypeInterface parent_iface; + + std::shared_ptr<arrow::io::FileInterface> (*get_raw)(GArrowFile *file); +}; + +std::shared_ptr<arrow::io::FileInterface> garrow_file_get_raw(GArrowFile *file); diff --git a/src/arrow/c_glib/arrow-glib/gobject-type.h b/src/arrow/c_glib/arrow-glib/gobject-type.h new file mode 100644 index 000000000..c9ac9ea81 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/gobject-type.h @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <glib-object.h> + +#ifndef G_DECLARE_DERIVABLE_TYPE +# define G_DECLARE_DERIVABLE_TYPE(ObjectName, \ + object_name, \ + MODULE_NAME, \ + OBJECT_NAME, \ + ParentName) \ + typedef struct _ ## ObjectName ObjectName; \ + typedef struct _ ## ObjectName ## Class ObjectName ## Class; \ + \ + struct _ ## ObjectName \ + { \ + ParentName parent_instance; \ + }; \ + \ + GType object_name ## _get_type(void) G_GNUC_CONST; \ + \ + static inline ObjectName * \ + MODULE_NAME ## _ ## OBJECT_NAME(gpointer object) \ + { \ + return G_TYPE_CHECK_INSTANCE_CAST(object, \ + object_name ## _get_type(), \ + ObjectName); \ + } \ + \ + static inline ObjectName ## Class * \ + MODULE_NAME ## _ ## OBJECT_NAME ## _CLASS(gpointer klass) \ + { \ + return G_TYPE_CHECK_CLASS_CAST(klass, \ + object_name ## _get_type(), \ + ObjectName ## Class); \ + } \ + \ + static inline gboolean \ + MODULE_NAME ## _IS_ ## OBJECT_NAME(gpointer object) \ + { \ + return G_TYPE_CHECK_INSTANCE_TYPE(object, \ + object_name ## _get_type()); \ + } \ + \ + static inline gboolean \ + MODULE_NAME ## _IS_ ## OBJECT_NAME ## _CLASS(gpointer klass) \ + { \ + return G_TYPE_CHECK_CLASS_TYPE(klass, \ + object_name ## _get_type()); \ + } \ + \ + static inline ObjectName ## Class * \ + MODULE_NAME ## _ ## ObjectName ## _GET_CLASS(gpointer object) \ + { \ + return G_TYPE_INSTANCE_GET_CLASS(object, \ + object_name ## _get_type(), \ + ObjectName ## Class); \ + } +#endif + +#ifndef G_DECLARE_INTERFACE +# define G_DECLARE_INTERFACE(ModuleObjectName, \ + module_object_name, \ + MODULE_NAME, \ + OBJECT_NAME, \ + PrerequisiteName) \ + typedef struct \ + _ ## ModuleObjectName \ + ModuleObjectName; \ + typedef struct \ + _ ## ModuleObjectName ## Interface \ + ModuleObjectName ## Interface; \ + \ + GType module_object_name ## _get_type(void); \ + \ + static inline ModuleObjectName * \ + MODULE_NAME ## _ ## OBJECT_NAME(gpointer object) \ + { \ + return G_TYPE_CHECK_INSTANCE_CAST(object, \ + module_object_name ## _get_type(), \ + ModuleObjectName); \ + } \ + \ + static inline gboolean \ + MODULE_NAME ## _IS_ ## OBJECT_NAME(gpointer object) \ + { \ + return G_TYPE_CHECK_INSTANCE_TYPE( \ + object, module_object_name ## _get_type()); \ + } \ + \ + static inline ModuleObjectName ## Interface * \ + MODULE_NAME ## _ ## OBJECT_NAME ## _GET_IFACE(gpointer object) \ + { \ + return G_TYPE_INSTANCE_GET_INTERFACE(object, \ + module_object_name ## _get_type(), \ + ModuleObjectName ## Interface); \ + } +#endif diff --git a/src/arrow/c_glib/arrow-glib/input-stream.cpp b/src/arrow/c_glib/arrow-glib/input-stream.cpp new file mode 100644 index 000000000..37e4702ff --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/input-stream.cpp @@ -0,0 +1,1328 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow/io/interfaces.h> +#include <arrow/io/memory.h> +#include <arrow/ipc/reader.h> +#include <arrow/util/string_view.h> + +#include <arrow-glib/buffer.hpp> +#include <arrow-glib/codec.hpp> +#include <arrow-glib/error.hpp> +#include <arrow-glib/file.hpp> +#include <arrow-glib/input-stream.hpp> +#include <arrow-glib/ipc-options.hpp> +#include <arrow-glib/readable.hpp> +#include <arrow-glib/record-batch.hpp> +#include <arrow-glib/schema.hpp> +#include <arrow-glib/tensor.hpp> + +#include <mutex> + +G_BEGIN_DECLS + +/** + * SECTION: input-stream + * @section_id: input-stream-classes + * @title: Input stream classes + * @include: arrow-glib/arrow-glib.h + * + * #GArrowInputStream is a base class for input stream. + * + * #GArrowSeekableInputStream is a base class for input stream that + * supports random access. + * + * #GArrowBufferInputStream is a class to read data on buffer. + * + * #GArrowFileInputStream is a class to read data in file. + * + * #GArrowMemoryMappedInputStream is a class to read data in file by + * mapping the file on memory. It supports zero copy. + * + * #GArrowGIOInputStream is a class for `GInputStream` based input + * stream. + * + * #GArrowCompressedInputStream is a class to read data from + * compressed input stream. + */ + +typedef struct GArrowInputStreamPrivate_ { + std::shared_ptr<arrow::io::InputStream> input_stream; +} GArrowInputStreamPrivate; + +enum { + PROP_INPUT_STREAM = 1 +}; + +static std::shared_ptr<arrow::io::FileInterface> +garrow_input_stream_get_raw_file_interface(GArrowFile *file) +{ + auto input_stream = GARROW_INPUT_STREAM(file); + auto arrow_input_stream = + garrow_input_stream_get_raw(input_stream); + return arrow_input_stream; +} + +static void +garrow_input_stream_file_interface_init(GArrowFileInterface *iface) +{ + iface->get_raw = garrow_input_stream_get_raw_file_interface; +} + +static std::shared_ptr<arrow::io::Readable> +garrow_input_stream_get_raw_readable_interface(GArrowReadable *readable) +{ + auto input_stream = GARROW_INPUT_STREAM(readable); + auto arrow_input_stream = garrow_input_stream_get_raw(input_stream); + return arrow_input_stream; +} + +static void +garrow_input_stream_readable_interface_init(GArrowReadableInterface *iface) +{ + iface->get_raw = garrow_input_stream_get_raw_readable_interface; +} + +G_DEFINE_TYPE_WITH_CODE(GArrowInputStream, + garrow_input_stream, + G_TYPE_INPUT_STREAM, + G_ADD_PRIVATE(GArrowInputStream) + G_IMPLEMENT_INTERFACE(GARROW_TYPE_FILE, + garrow_input_stream_file_interface_init) + G_IMPLEMENT_INTERFACE(GARROW_TYPE_READABLE, + garrow_input_stream_readable_interface_init)) + +#define GARROW_INPUT_STREAM_GET_PRIVATE(obj) \ + static_cast<GArrowInputStreamPrivate *>( \ + garrow_input_stream_get_instance_private( \ + GARROW_INPUT_STREAM(obj))) + +static void +garrow_input_stream_finalize(GObject *object) +{ + auto priv = GARROW_INPUT_STREAM_GET_PRIVATE(object); + + priv->input_stream.~shared_ptr(); + + G_OBJECT_CLASS(garrow_input_stream_parent_class)->finalize(object); +} + +static void +garrow_input_stream_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_INPUT_STREAM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_INPUT_STREAM: + priv->input_stream = + *static_cast<std::shared_ptr<arrow::io::InputStream> *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_input_stream_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + switch (prop_id) { + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static gssize +garrow_input_stream_read(GInputStream *stream, + void *buffer, + gsize count, + GCancellable *cancellable, + GError **error) +{ + if (g_cancellable_set_error_if_cancelled(cancellable, error)) { + return -1; + } + auto arrow_input_stream = + garrow_input_stream_get_raw(GARROW_INPUT_STREAM(stream)); + auto n_read_bytes = arrow_input_stream->Read(count, buffer); + if (!garrow::check(error, n_read_bytes, "[input-stream][read]")) { + return -1; + } + return n_read_bytes.ValueOrDie(); +} + +static gssize +garrow_input_stream_skip(GInputStream *stream, + gsize count, + GCancellable *cancellable, + GError **error) +{ + if (g_cancellable_set_error_if_cancelled(cancellable, error)) { + return -1; + } + auto arrow_input_stream = + garrow_input_stream_get_raw(GARROW_INPUT_STREAM(stream)); + auto status = arrow_input_stream->Advance(count); + if (!garrow_error_check(error, status, "[input-stream][skip]")) { + return -1; + } + return count; +} + +static gboolean +garrow_input_stream_close(GInputStream *stream, + GCancellable *cancellable, + GError **error) +{ + if (g_cancellable_set_error_if_cancelled(cancellable, error)) { + return FALSE; + } + auto arrow_input_stream = + garrow_input_stream_get_raw(GARROW_INPUT_STREAM(stream)); + auto status = arrow_input_stream->Close(); + return garrow_error_check(error, status, "[input-stream][close]"); +} + +static void +garrow_input_stream_init(GArrowInputStream *object) +{ + auto priv = GARROW_INPUT_STREAM_GET_PRIVATE(object); + new(&priv->input_stream) std::shared_ptr<arrow::io::InputStream>; +} + +static void +garrow_input_stream_class_init(GArrowInputStreamClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->finalize = garrow_input_stream_finalize; + gobject_class->set_property = garrow_input_stream_set_property; + gobject_class->get_property = garrow_input_stream_get_property; + + auto input_stream_class = G_INPUT_STREAM_CLASS(klass); + input_stream_class->read_fn = garrow_input_stream_read; + input_stream_class->skip = garrow_input_stream_skip; + input_stream_class->close_fn = garrow_input_stream_close; + + GParamSpec *spec; + spec = g_param_spec_pointer("input-stream", + "Input stream", + "The raw std::shared<arrow::io::InputStream> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_INPUT_STREAM, spec); +} + +/** + * garrow_input_stream_advance: + * @input_stream: A #GArrowInputStream. + * @n_bytes: The number of bytes to be advanced. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE on error. + * + * Since: 0.11.0 + */ +gboolean +garrow_input_stream_advance(GArrowInputStream *input_stream, + gint64 n_bytes, + GError **error) +{ + auto arrow_input_stream = garrow_input_stream_get_raw(input_stream); + auto status = arrow_input_stream->Advance(n_bytes); + return garrow_error_check(error, status, "[input-stream][advance]"); +} + +/** + * garrow_input_stream_align: + * @input_stream: A #GArrowInputStream. + * @alignment: The byte multiple for the metadata prefix, usually 8 + * or 64, to ensure the body starts on a multiple of that alignment. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE on error. + * + * Since: 0.11.0 + */ +gboolean +garrow_input_stream_align(GArrowInputStream *input_stream, + gint32 alignment, + GError **error) +{ + auto arrow_input_stream = garrow_input_stream_get_raw(input_stream); + auto status = arrow::ipc::AlignStream(arrow_input_stream.get(), + alignment); + return garrow_error_check(error, status, "[input-stream][align]"); +} + +/** + * garrow_input_stream_read_tensor: + * @input_stream: A #GArrowInputStream. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): + * #GArrowTensor on success, %NULL on error. + * + * Since: 0.11.0 + */ +GArrowTensor * +garrow_input_stream_read_tensor(GArrowInputStream *input_stream, + GError **error) +{ + auto arrow_input_stream = garrow_input_stream_get_raw(input_stream); + + auto arrow_tensor = arrow::ipc::ReadTensor(arrow_input_stream.get()); + if (garrow::check(error, arrow_tensor, "[input-stream][read-tensor]")) { + return garrow_tensor_new_raw(&(arrow_tensor.ValueOrDie())); + } else { + return NULL; + } +} + +/** + * garrow_input_stream_read_record_batch: + * @input_stream: A #GArrowInputStream. + * @schema: A #GArrowSchema for a read record batch. + * @options: (nullable): A #GArrowReadOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): + * #GArrowRecordBatch on success, %NULL on error. + * + * Since: 1.0.0 + */ +GArrowRecordBatch * +garrow_input_stream_read_record_batch(GArrowInputStream *input_stream, + GArrowSchema *schema, + GArrowReadOptions *options, + GError **error) +{ + auto arrow_input_stream = garrow_input_stream_get_raw(input_stream); + auto arrow_schema = garrow_schema_get_raw(schema); + + if (options) { + auto arrow_options = garrow_read_options_get_raw(options); + auto arrow_dictionary_memo = + garrow_read_options_get_dictionary_memo_raw(options); + auto arrow_record_batch = + arrow::ipc::ReadRecordBatch(arrow_schema, + arrow_dictionary_memo, + *arrow_options, + arrow_input_stream.get()); + if (garrow::check(error, + arrow_record_batch, + "[input-stream][read-record-batch]")) { + return garrow_record_batch_new_raw(&(*arrow_record_batch)); + } else { + return NULL; + } + } else { + auto arrow_options = arrow::ipc::IpcReadOptions::Defaults(); + auto arrow_record_batch = + arrow::ipc::ReadRecordBatch(arrow_schema, + nullptr, + arrow_options, + arrow_input_stream.get()); + if (garrow::check(error, + arrow_record_batch, + "[input-stream][read-record-batch]")) { + return garrow_record_batch_new_raw(&(*arrow_record_batch)); + } else { + return NULL; + } + } +} + + +G_DEFINE_TYPE(GArrowSeekableInputStream, + garrow_seekable_input_stream, + GARROW_TYPE_INPUT_STREAM); + +static void +garrow_seekable_input_stream_init(GArrowSeekableInputStream *object) +{ +} + +static void +garrow_seekable_input_stream_class_init(GArrowSeekableInputStreamClass *klass) +{ +} + +/** + * garrow_seekable_input_stream_get_size: + * @input_stream: A #GArrowSeekableInputStream. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: The size of the file. + */ +guint64 +garrow_seekable_input_stream_get_size(GArrowSeekableInputStream *input_stream, + GError **error) +{ + auto arrow_random_access_file = + garrow_seekable_input_stream_get_raw(input_stream); + auto size = arrow_random_access_file->GetSize(); + if (garrow::check(error, size, "[seekable-input-stream][get-size]")) { + return size.ValueOrDie(); + } else { + return 0; + } +} + +/** + * garrow_seekable_input_stream_get_support_zero_copy: + * @input_stream: A #GArrowSeekableInputStream. + * + * Returns: Whether zero copy read is supported or not. + */ +gboolean +garrow_seekable_input_stream_get_support_zero_copy(GArrowSeekableInputStream *input_stream) +{ + auto arrow_random_access_file = + garrow_seekable_input_stream_get_raw(input_stream); + return arrow_random_access_file->supports_zero_copy(); +} + +/** + * garrow_seekable_input_stream_read_at: + * @input_stream: A #GArrowSeekableInputStream. + * @position: The read start position. + * @n_bytes: The number of bytes to be read. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): #GArrowBuffer that has read + * data on success, %NULL if there was an error. + */ +GArrowBuffer * +garrow_seekable_input_stream_read_at(GArrowSeekableInputStream *input_stream, + gint64 position, + gint64 n_bytes, + GError **error) +{ + auto arrow_random_access_file = + garrow_seekable_input_stream_get_raw(input_stream); + + auto arrow_buffer = arrow_random_access_file->ReadAt(position, n_bytes); + if (garrow::check(error, arrow_buffer, "[seekable-input-stream][read-at]")) { + return garrow_buffer_new_raw(&(arrow_buffer.ValueOrDie())); + } else { + return NULL; + } +} + +/** + * garrow_seekable_input_stream_read_at_bytes: + * @input_stream: A #GArrowSeekableInputStream. + * @position: The read start position. + * @n_bytes: The number of bytes to be read. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): #GBytes that has read data on + * success, %NULL if there was an error. + * + * Since: 0.15.0 + */ +GBytes * +garrow_seekable_input_stream_read_at_bytes(GArrowSeekableInputStream *input_stream, + gint64 position, + gint64 n_bytes, + GError **error) +{ + auto arrow_random_access_file = + garrow_seekable_input_stream_get_raw(input_stream); + + auto arrow_buffer_result = arrow_random_access_file->ReadAt(position, n_bytes); + if (!garrow::check(error, + arrow_buffer_result, + "[seekable-input-stream][read-at][bytes]")) { + return NULL; + } + + auto arrow_cpu_buffer_result = + arrow::Buffer::ViewOrCopy(*arrow_buffer_result, + arrow::default_cpu_memory_manager()); + if (!garrow::check(error, + arrow_cpu_buffer_result, + "[seekable-input-stream][read-at][bytes][view-or-copy]")) { + return NULL; + } + + auto arrow_cpu_buffer = *arrow_cpu_buffer_result; + return g_bytes_new(arrow_cpu_buffer->data(), + arrow_cpu_buffer->size()); +} + + +/** + * garrow_seekable_input_stream_peek: + * @input_stream: A #GArrowSeekableInputStream. + * @n_bytes: The number of bytes to be peeked. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): The data of the buffer, up to the + * indicated number. The data becomes invalid after any operation on + * the stream. If the stream is unbuffered, the data is empty. + * + * It should be freed with g_bytes_unref() when no longer needed. + * + * Since: 0.12.0 + */ +GBytes * +garrow_seekable_input_stream_peek(GArrowSeekableInputStream *input_stream, + gint64 n_bytes, + GError **error) +{ + auto arrow_random_access_file = + garrow_seekable_input_stream_get_raw(input_stream); + + auto view_result = arrow_random_access_file->Peek(n_bytes); + if (garrow::check(error, view_result, "[seekable-input-stream][peek]")) { + auto view = view_result.ValueOrDie(); + return g_bytes_new_static(view.data(), view.size()); + } else { + return NULL; + } +} + + +typedef struct GArrowBufferInputStreamPrivate_ { + GArrowBuffer *buffer; +} GArrowBufferInputStreamPrivate; + +enum { + PROP_BUFFER = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowBufferInputStream, + garrow_buffer_input_stream, + GARROW_TYPE_SEEKABLE_INPUT_STREAM); + +#define GARROW_BUFFER_INPUT_STREAM_GET_PRIVATE(obj) \ + static_cast<GArrowBufferInputStreamPrivate *>( \ + garrow_buffer_input_stream_get_instance_private( \ + GARROW_BUFFER_INPUT_STREAM(obj))) + +static void +garrow_buffer_input_stream_dispose(GObject *object) +{ + auto priv = GARROW_BUFFER_INPUT_STREAM_GET_PRIVATE(object); + + if (priv->buffer) { + g_object_unref(priv->buffer); + priv->buffer = nullptr; + } + + G_OBJECT_CLASS(garrow_buffer_input_stream_parent_class)->dispose(object); +} + +static void +garrow_buffer_input_stream_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_BUFFER_INPUT_STREAM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_BUFFER: + priv->buffer = GARROW_BUFFER(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_buffer_input_stream_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_BUFFER_INPUT_STREAM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_BUFFER: + g_value_set_object(value, priv->buffer); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_buffer_input_stream_init(GArrowBufferInputStream *object) +{ +} + +static void +garrow_buffer_input_stream_class_init(GArrowBufferInputStreamClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_buffer_input_stream_dispose; + gobject_class->set_property = garrow_buffer_input_stream_set_property; + gobject_class->get_property = garrow_buffer_input_stream_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("buffer", + "Buffer", + "The data", + GARROW_TYPE_BUFFER, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_BUFFER, spec); +} + +/** + * garrow_buffer_input_stream_new: + * @buffer: The buffer to be read. + * + * Returns: A newly created #GArrowBufferInputStream. + */ +GArrowBufferInputStream * +garrow_buffer_input_stream_new(GArrowBuffer *buffer) +{ + auto arrow_buffer = garrow_buffer_get_raw(buffer); + auto arrow_buffer_reader = + std::make_shared<arrow::io::BufferReader>(arrow_buffer); + return garrow_buffer_input_stream_new_raw(&arrow_buffer_reader, buffer); +} + +/** + * garrow_buffer_input_stream_get_buffer: + * @input_stream: A #GArrowBufferInputStream. + * + * Returns: (transfer full): The data of the stream as #GArrowBuffer. + */ +GArrowBuffer * +garrow_buffer_input_stream_get_buffer(GArrowBufferInputStream *input_stream) +{ + auto priv = GARROW_BUFFER_INPUT_STREAM_GET_PRIVATE(input_stream); + if (priv->buffer) { + g_object_ref(priv->buffer); + return priv->buffer; + } + + auto arrow_buffer_reader = garrow_buffer_input_stream_get_raw(input_stream); + auto arrow_buffer = arrow_buffer_reader->buffer(); + return garrow_buffer_new_raw(&arrow_buffer); +} + + +G_DEFINE_TYPE(GArrowFileInputStream, + garrow_file_input_stream, + GARROW_TYPE_SEEKABLE_INPUT_STREAM); + +static void +garrow_file_input_stream_init(GArrowFileInputStream *object) +{ +} + +static void +garrow_file_input_stream_class_init(GArrowFileInputStreamClass *klass) +{ +} + +/** + * garrow_file_input_stream_new: + * @path: The path of the file to be opened. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowFileInputStream + * or %NULL on error. + * + * Since: 6.0.0 + */ +GArrowFileInputStream * +garrow_file_input_stream_new(const gchar *path, + GError **error) +{ + auto arrow_stream_result = arrow::io::ReadableFile::Open(path); + if (garrow::check(error, arrow_stream_result, "[file-input-stream][new]")) { + auto arrow_stream = *arrow_stream_result; + return garrow_file_input_stream_new_raw(&arrow_stream); + } else { + return NULL; + } +} + +/** + * garrow_file_input_stream_new_file_descriptor: + * @file_descriptor: The file descriptor of this input stream. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowFileInputStream + * or %NULL on error. + * + * Since: 6.0.0 + */ +GArrowFileInputStream * +garrow_file_input_stream_new_file_descriptor(gint file_descriptor, + GError **error) +{ + auto arrow_stream_result = arrow::io::ReadableFile::Open(file_descriptor); + if (garrow::check(error, + arrow_stream_result, + "[file-input-stream][new-file-descriptor]")) { + auto arrow_stream = *arrow_stream_result; + return garrow_file_input_stream_new_raw(&arrow_stream); + } else { + return NULL; + } +} + +/** + * garrow_file_input_stream_get_file_descriptor: + * @stream: A #GArrowFileInuptStream. + * + * Returns: The file descriptor of @stream. + * + * Since: 6.0.0 + */ +gint +garrow_file_input_stream_get_file_descriptor(GArrowFileInputStream *stream) +{ + auto arrow_stream = + std::static_pointer_cast<arrow::io::ReadableFile>( + garrow_input_stream_get_raw(GARROW_INPUT_STREAM(stream))); + return arrow_stream->file_descriptor(); +} + + +G_DEFINE_TYPE(GArrowMemoryMappedInputStream, + garrow_memory_mapped_input_stream, + GARROW_TYPE_SEEKABLE_INPUT_STREAM); + +static void +garrow_memory_mapped_input_stream_init(GArrowMemoryMappedInputStream *object) +{ +} + +static void +garrow_memory_mapped_input_stream_class_init(GArrowMemoryMappedInputStreamClass *klass) +{ +} + +/** + * garrow_memory_mapped_input_stream_new: + * @path: The path of the file to be mapped on memory. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowMemoryMappedInputStream + * or %NULL on error. + */ +GArrowMemoryMappedInputStream * +garrow_memory_mapped_input_stream_new(const gchar *path, + GError **error) +{ + auto arrow_stream_result = + arrow::io::MemoryMappedFile::Open(path, arrow::io::FileMode::READ); + if (garrow::check(error, + arrow_stream_result, + "[memory-mapped-input-stream][new]")) { + auto arrow_stream = *arrow_stream_result; + return garrow_memory_mapped_input_stream_new_raw(&arrow_stream); + } else { + return NULL; + } +} + + +G_END_DECLS + +namespace garrow { + class GIOInputStream : public arrow::io::RandomAccessFile { + public: + GIOInputStream(GInputStream *input_stream) : + input_stream_(input_stream), + lock_() { + g_object_ref(input_stream_); + } + + ~GIOInputStream() { + g_object_unref(input_stream_); + } + + GInputStream *get_input_stream() { + return input_stream_; + } + + bool closed() const override { + return static_cast<bool>(g_input_stream_is_closed(input_stream_)); + } + + arrow::Status Close() override { + std::lock_guard<std::mutex> guard(lock_); + GError *error = NULL; + if (g_input_stream_close(input_stream_, NULL, &error)) { + return arrow::Status::OK(); + } else { + return garrow_error_to_status(error, + arrow::StatusCode::IOError, + "[gio-input-stream][close]"); + } + } + + arrow::Result<int64_t> Tell() const override { + if (!G_IS_SEEKABLE(input_stream_)) { + std::string message("[gio-input-stream][tell] " + "not seekable input stream: <"); + message += G_OBJECT_CLASS_NAME(G_OBJECT_GET_CLASS(input_stream_)); + message += ">"; + return arrow::Status::NotImplemented(message); + } + + return g_seekable_tell(G_SEEKABLE(input_stream_)); + } + + arrow::Result<int64_t> Read(int64_t n_bytes, void *out) override { + std::lock_guard<std::mutex> guard(lock_); + GError *error = NULL; + auto n_read_bytes = g_input_stream_read(input_stream_, + out, + n_bytes, + NULL, + &error); + if (n_read_bytes == -1) { + return garrow_error_to_status(error, + arrow::StatusCode::IOError, + "[gio-input-stream][read]"); + } else { + return n_read_bytes; + } + } + + arrow::Result<int64_t> ReadAt(int64_t position, + int64_t n_bytes, + void* out) override { + return arrow::io::RandomAccessFile::ReadAt(position, n_bytes, out); + } + + arrow::Result<std::shared_ptr<arrow::Buffer>> + ReadAt(int64_t position, int64_t n_bytes) override { + return arrow::io::RandomAccessFile::ReadAt(position, n_bytes); + } + + arrow::Result<std::shared_ptr<arrow::Buffer>> + Read(int64_t n_bytes) override { + ARROW_ASSIGN_OR_RAISE(auto buffer, arrow::AllocateResizableBuffer(n_bytes)); + + std::lock_guard<std::mutex> guard(lock_); + GError *error = NULL; + auto n_read_bytes = g_input_stream_read(input_stream_, + buffer->mutable_data(), + n_bytes, + NULL, + &error); + if (n_read_bytes == -1) { + return garrow_error_to_status(error, + arrow::StatusCode::IOError, + "[gio-input-stream][read][buffer]"); + } else { + if (n_read_bytes < n_bytes) { + RETURN_NOT_OK(buffer->Resize(n_read_bytes)); + } + return std::move(buffer); + } + } + + arrow::Result<arrow::util::string_view> Peek(int64_t nbytes) override { + if (!G_IS_BUFFERED_INPUT_STREAM(input_stream_)) { + std::string message("[gio-input-stream][peek] " + "not peekable input stream: <"); + message += G_OBJECT_CLASS_NAME(G_OBJECT_GET_CLASS(input_stream_)); + message += ">"; + return arrow::Status::NotImplemented(message); + } + + auto stream = G_BUFFERED_INPUT_STREAM(input_stream_); + auto available_n_bytes = g_buffered_input_stream_get_available(stream); + if (available_n_bytes < static_cast<gsize>(nbytes)) { + GError *error = NULL; + auto filled_size = + g_buffered_input_stream_fill(stream, nbytes, NULL, &error); + if (filled_size == -1) { + return garrow_error_to_status(error, + arrow::StatusCode::IOError, + "[gio-input-stream][peek] " + "failed to fill"); + } + } + gsize data_size; + auto data = g_buffered_input_stream_peek_buffer(stream, &data_size); + if (data_size > static_cast<gsize>(nbytes)) { + data_size = nbytes; + } + return arrow::util::string_view(static_cast<const char *>(data), + data_size); + } + + arrow::Status Seek(int64_t position) override { + if (!G_IS_SEEKABLE(input_stream_)) { + std::string message("[gio-input-stream][seek] " + "not seekable input stream: <"); + message += G_OBJECT_CLASS_NAME(G_OBJECT_GET_CLASS(input_stream_)); + message += ">"; + return arrow::Status::NotImplemented(message); + } + + std::lock_guard<std::mutex> guard(lock_); + GError *error = NULL; + if (g_seekable_seek(G_SEEKABLE(input_stream_), + position, + G_SEEK_SET, + NULL, + &error)) { + return arrow::Status::OK(); + } else { + return garrow_error_to_status(error, + arrow::StatusCode::IOError, + "[gio-input-stream][seek]"); + } + } + + arrow::Result<int64_t> GetSize() override { + if (!G_IS_SEEKABLE(input_stream_)) { + std::string message("[gio-input-stream][size] " + "not seekable input stream: <"); + message += G_OBJECT_CLASS_NAME(G_OBJECT_GET_CLASS(input_stream_)); + message += ">"; + return arrow::Status::NotImplemented(message); + } + + std::lock_guard<std::mutex> guard(lock_); + auto current_position = g_seekable_tell(G_SEEKABLE(input_stream_)); + GError *error = NULL; + if (!g_seekable_seek(G_SEEKABLE(input_stream_), + 0, + G_SEEK_END, + NULL, + &error)) { + return garrow_error_to_status(error, + arrow::StatusCode::IOError, + "[gio-input-stream][size][seek]"); + } + auto size = g_seekable_tell(G_SEEKABLE(input_stream_)); + if (!g_seekable_seek(G_SEEKABLE(input_stream_), + current_position, + G_SEEK_SET, + NULL, + &error)) { + return garrow_error_to_status(error, + arrow::StatusCode::IOError, + "[gio-input-stream][size][seek][restore]"); + } + return size; + } + + bool supports_zero_copy() const override { + return false; + } + + private: + GInputStream *input_stream_; + std::mutex lock_; + }; +}; + +G_BEGIN_DECLS + + +typedef struct GArrowGIOInputStreamPrivate_ { + GInputStream *raw; +} GArrowGIOInputStreamPrivate; + +enum { + PROP_GIO_RAW = 1 +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowGIOInputStream, + garrow_gio_input_stream, + GARROW_TYPE_SEEKABLE_INPUT_STREAM); + +#define GARROW_GIO_INPUT_STREAM_GET_PRIVATE(object) \ + static_cast<GArrowGIOInputStreamPrivate *>( \ + garrow_gio_input_stream_get_instance_private( \ + GARROW_GIO_INPUT_STREAM(object))) + +static void +garrow_gio_input_stream_dispose(GObject *object) +{ + auto priv = GARROW_GIO_INPUT_STREAM_GET_PRIVATE(object); + + if (priv->raw) { + g_object_unref(priv->raw); + priv->raw = nullptr; + } + + G_OBJECT_CLASS(garrow_gio_input_stream_parent_class)->dispose(object); +} + +static void +garrow_gio_input_stream_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_GIO_INPUT_STREAM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_GIO_RAW: + priv->raw = G_INPUT_STREAM(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_gio_input_stream_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_GIO_INPUT_STREAM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_GIO_RAW: + g_value_set_object(value, priv->raw); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_gio_input_stream_init(GArrowGIOInputStream *object) +{ +} + +static void +garrow_gio_input_stream_class_init(GArrowGIOInputStreamClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_gio_input_stream_dispose; + gobject_class->set_property = garrow_gio_input_stream_set_property; + gobject_class->get_property = garrow_gio_input_stream_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("raw", + "Raw", + "The raw GInputStream *", + G_TYPE_INPUT_STREAM, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_GIO_RAW, spec); +} + +/** + * garrow_gio_input_stream_new: + * @gio_input_stream: The stream to be read. + * + * Returns: (transfer full): A newly created #GArrowGIOInputStream. + * + * Since: 0.5.0 + */ +GArrowGIOInputStream * +garrow_gio_input_stream_new(GInputStream *gio_input_stream) +{ + auto arrow_input_stream = + std::make_shared<garrow::GIOInputStream>(gio_input_stream); + auto object = g_object_new(GARROW_TYPE_GIO_INPUT_STREAM, + "input-stream", &arrow_input_stream, + "raw", gio_input_stream, + NULL); + auto input_stream = GARROW_GIO_INPUT_STREAM(object); + return input_stream; +} + +/** + * garrow_gio_input_stream_get_raw: + * @input_stream: A #GArrowGIOInputStream. + * + * Returns: (transfer none): The wrapped #GInputStream. + * + * Since: 0.5.0 + * + * Deprecated: 0.12.0: Use GArrowGIOInputStream::raw property instead. + */ +GInputStream * +garrow_gio_input_stream_get_raw(GArrowGIOInputStream *input_stream) +{ + auto priv = GARROW_GIO_INPUT_STREAM_GET_PRIVATE(input_stream); + return priv->raw; +} + +typedef struct GArrowCompressedInputStreamPrivate_ { + GArrowCodec *codec; + GArrowInputStream *raw; +} GArrowCompressedInputStreamPrivate; + +enum { + PROP_CODEC = 1, + PROP_RAW +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowCompressedInputStream, + garrow_compressed_input_stream, + GARROW_TYPE_INPUT_STREAM) + +#define GARROW_COMPRESSED_INPUT_STREAM_GET_PRIVATE(object) \ + static_cast<GArrowCompressedInputStreamPrivate *>( \ + garrow_compressed_input_stream_get_instance_private( \ + GARROW_COMPRESSED_INPUT_STREAM(object))) + +static void +garrow_compressed_input_stream_dispose(GObject *object) +{ + auto priv = GARROW_COMPRESSED_INPUT_STREAM_GET_PRIVATE(object); + + if (priv->codec) { + g_object_unref(priv->codec); + priv->codec = NULL; + } + + if (priv->raw) { + g_object_unref(priv->raw); + priv->raw = NULL; + } + + G_OBJECT_CLASS(garrow_compressed_input_stream_parent_class)->dispose(object); +} + +static void +garrow_compressed_input_stream_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_COMPRESSED_INPUT_STREAM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_CODEC: + priv->codec = GARROW_CODEC(g_value_dup_object(value)); + break; + case PROP_RAW: + priv->raw = GARROW_INPUT_STREAM(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_compressed_input_stream_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_COMPRESSED_INPUT_STREAM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_CODEC: + g_value_set_object(value, priv->codec); + break; + case PROP_RAW: + g_value_set_object(value, priv->raw); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_compressed_input_stream_init(GArrowCompressedInputStream *object) +{ +} + +static void +garrow_compressed_input_stream_class_init(GArrowCompressedInputStreamClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_compressed_input_stream_dispose; + gobject_class->set_property = garrow_compressed_input_stream_set_property; + gobject_class->get_property = garrow_compressed_input_stream_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("codec", + "Codec", + "The codec for the stream", + GARROW_TYPE_CODEC, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_CODEC, spec); + + spec = g_param_spec_object("raw", + "Raw", + "The underlying raw input stream", + GARROW_TYPE_INPUT_STREAM, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_RAW, spec); +} + +/** + * garrow_compressed_input_stream_new: + * @codec: A #GArrowCodec for compressed data in the @raw. + * @raw: A #GArrowInputStream that contains compressed data. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: A newly created #GArrowCompressedInputStream. + * + * Since: 0.12.0 + */ +GArrowCompressedInputStream * +garrow_compressed_input_stream_new(GArrowCodec *codec, + GArrowInputStream *raw, + GError **error) +{ + auto arrow_codec = garrow_codec_get_raw(codec).get(); + auto arrow_raw = garrow_input_stream_get_raw(raw); + auto arrow_stream = + arrow::io::CompressedInputStream::Make(arrow_codec, arrow_raw); + if (garrow::check(error, arrow_stream, "[compressed-input-stream][new]")) { + return garrow_compressed_input_stream_new_raw(&(arrow_stream.ValueOrDie()), + codec, + raw); + } else { + return NULL; + } +} + +G_END_DECLS + +GArrowInputStream * +garrow_input_stream_new_raw(std::shared_ptr<arrow::io::InputStream> *arrow_input_stream) +{ + auto input_stream = + GARROW_INPUT_STREAM(g_object_new(GARROW_TYPE_INPUT_STREAM, + "input-stream", arrow_input_stream, + NULL)); + return input_stream; +} + +std::shared_ptr<arrow::io::InputStream> +garrow_input_stream_get_raw(GArrowInputStream *input_stream) +{ + auto priv = GARROW_INPUT_STREAM_GET_PRIVATE(input_stream); + return priv->input_stream; +} + +GArrowSeekableInputStream * +garrow_seekable_input_stream_new_raw( + std::shared_ptr<arrow::io::RandomAccessFile> *arrow_random_access_file) +{ + auto object = g_object_new(GARROW_TYPE_SEEKABLE_INPUT_STREAM, + "input-stream", arrow_random_access_file, + NULL); + return GARROW_SEEKABLE_INPUT_STREAM(object); +} + +std::shared_ptr<arrow::io::RandomAccessFile> +garrow_seekable_input_stream_get_raw( + GArrowSeekableInputStream *seekable_input_stream) +{ + auto arrow_input_stream = + garrow_input_stream_get_raw(GARROW_INPUT_STREAM(seekable_input_stream)); + auto arrow_random_access_file = + std::static_pointer_cast<arrow::io::RandomAccessFile>(arrow_input_stream); + return arrow_random_access_file; +} + +GArrowBufferInputStream * +garrow_buffer_input_stream_new_raw(std::shared_ptr<arrow::io::BufferReader> *arrow_buffer_reader, + GArrowBuffer *buffer) +{ + auto buffer_input_stream = + GARROW_BUFFER_INPUT_STREAM(g_object_new(GARROW_TYPE_BUFFER_INPUT_STREAM, + "input-stream", arrow_buffer_reader, + "buffer", buffer, + NULL)); + return buffer_input_stream; +} + +std::shared_ptr<arrow::io::BufferReader> +garrow_buffer_input_stream_get_raw(GArrowBufferInputStream *buffer_input_stream) +{ + auto arrow_input_stream = + garrow_input_stream_get_raw(GARROW_INPUT_STREAM(buffer_input_stream)); + auto arrow_buffer_reader = + std::static_pointer_cast<arrow::io::BufferReader>(arrow_input_stream); + return arrow_buffer_reader; +} + + +GArrowFileInputStream * +garrow_file_input_stream_new_raw( + std::shared_ptr<arrow::io::ReadableFile> *arrow_stream) +{ + return GARROW_FILE_INPUT_STREAM(g_object_new(GARROW_TYPE_FILE_INPUT_STREAM, + "input-stream", arrow_stream, + NULL)); +} + + +GArrowMemoryMappedInputStream * +garrow_memory_mapped_input_stream_new_raw( + std::shared_ptr<arrow::io::MemoryMappedFile> *arrow_stream) +{ + return GARROW_MEMORY_MAPPED_INPUT_STREAM( + g_object_new(GARROW_TYPE_MEMORY_MAPPED_INPUT_STREAM, + "input-stream", arrow_stream, + NULL)); +} + + +GArrowCompressedInputStream * +garrow_compressed_input_stream_new_raw(std::shared_ptr<arrow::io::CompressedInputStream> *arrow_raw, + GArrowCodec *codec, + GArrowInputStream *raw) +{ + auto compressed_input_stream = + g_object_new(GARROW_TYPE_COMPRESSED_INPUT_STREAM, + "input-stream", arrow_raw, + "codec", codec, + "raw", raw, + NULL); + return GARROW_COMPRESSED_INPUT_STREAM(compressed_input_stream); +} + +std::shared_ptr<arrow::io::InputStream> +garrow_compressed_input_stream_get_raw(GArrowCompressedInputStream *compressed_input_stream) +{ + auto input_stream = GARROW_INPUT_STREAM(compressed_input_stream); + auto arrow_input_stream = garrow_input_stream_get_raw(input_stream); + auto arrow_compressed_input_stream = + std::static_pointer_cast<arrow::io::CompressedInputStream>(arrow_input_stream); + return arrow_compressed_input_stream->raw(); +} diff --git a/src/arrow/c_glib/arrow-glib/input-stream.h b/src/arrow/c_glib/arrow-glib/input-stream.h new file mode 100644 index 000000000..5f583c804 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/input-stream.h @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <gio/gio.h> + +#include <arrow-glib/buffer.h> +#include <arrow-glib/codec.h> +#include <arrow-glib/ipc-options.h> +#include <arrow-glib/record-batch.h> +#include <arrow-glib/tensor.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_INPUT_STREAM (garrow_input_stream_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowInputStream, + garrow_input_stream, + GARROW, + INPUT_STREAM, + GInputStream) +struct _GArrowInputStreamClass +{ + GInputStreamClass parent_class; +}; + +gboolean garrow_input_stream_advance(GArrowInputStream *input_stream, + gint64 n_bytes, + GError **error); +gboolean garrow_input_stream_align(GArrowInputStream *input_stream, + gint32 alignment, + GError **error); +GArrowTensor *garrow_input_stream_read_tensor(GArrowInputStream *input_stream, + GError **error); +GARROW_AVAILABLE_IN_1_0 +GArrowRecordBatch * +garrow_input_stream_read_record_batch(GArrowInputStream *input_stream, + GArrowSchema *schema, + GArrowReadOptions *options, + GError **error); + +#define GARROW_TYPE_SEEKABLE_INPUT_STREAM \ + (garrow_seekable_input_stream_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowSeekableInputStream, + garrow_seekable_input_stream, + GARROW, + SEEKABLE_INPUT_STREAM, + GArrowInputStream) +struct _GArrowSeekableInputStreamClass +{ + GArrowInputStreamClass parent_class; +}; + +guint64 garrow_seekable_input_stream_get_size(GArrowSeekableInputStream *input_stream, + GError **error); +gboolean garrow_seekable_input_stream_get_support_zero_copy(GArrowSeekableInputStream *input_stream); +GArrowBuffer * +garrow_seekable_input_stream_read_at(GArrowSeekableInputStream *input_stream, + gint64 position, + gint64 n_bytes, + GError **error); +GARROW_AVAILABLE_IN_0_15 +GBytes * +garrow_seekable_input_stream_read_at_bytes(GArrowSeekableInputStream *input_stream, + gint64 position, + gint64 n_bytes, + GError **error); +GARROW_AVAILABLE_IN_0_12 +GBytes *garrow_seekable_input_stream_peek(GArrowSeekableInputStream *input_stream, + gint64 n_bytes, + GError **error); + + +#define GARROW_TYPE_BUFFER_INPUT_STREAM \ + (garrow_buffer_input_stream_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowBufferInputStream, + garrow_buffer_input_stream, + GARROW, + BUFFER_INPUT_STREAM, + GArrowSeekableInputStream) +struct _GArrowBufferInputStreamClass +{ + GArrowSeekableInputStreamClass parent_class; +}; + +GArrowBufferInputStream *garrow_buffer_input_stream_new(GArrowBuffer *buffer); + +GArrowBuffer *garrow_buffer_input_stream_get_buffer(GArrowBufferInputStream *input_stream); + + +#define GARROW_TYPE_FILE_INPUT_STREAM (garrow_file_input_stream_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowFileInputStream, + garrow_file_input_stream, + GARROW, + FILE_INPUT_STREAM, + GArrowSeekableInputStream) +struct _GArrowFileInputStreamClass +{ + GArrowSeekableInputStreamClass parent_class; +}; + +GArrowFileInputStream * +garrow_file_input_stream_new(const gchar *path, + GError **error); +GArrowFileInputStream * +garrow_file_input_stream_new_file_descriptor(gint file_descriptor, + GError **error); +gint +garrow_file_input_stream_get_file_descriptor(GArrowFileInputStream *stream); + + +#define GARROW_TYPE_MEMORY_MAPPED_INPUT_STREAM \ + (garrow_memory_mapped_input_stream_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowMemoryMappedInputStream, + garrow_memory_mapped_input_stream, + GARROW, + MEMORY_MAPPED_INPUT_STREAM, + GArrowSeekableInputStream) +struct _GArrowMemoryMappedInputStreamClass +{ + GArrowSeekableInputStreamClass parent_class; +}; + +GArrowMemoryMappedInputStream * +garrow_memory_mapped_input_stream_new(const gchar *path, + GError **error); + + +#define GARROW_TYPE_GIO_INPUT_STREAM \ + (garrow_gio_input_stream_get_type()) +#define GARROW_GIO_INPUT_STREAM(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST((obj), \ + GARROW_TYPE_GIO_INPUT_STREAM, \ + GArrowGIOInputStream)) +#define GARROW_GIO_INPUT_STREAM_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST((klass), \ + GARROW_TYPE_GIO_INPUT_STREAM, \ + GArrowGIOInputStreamClass)) +#define GARROW_IS_GIO_INPUT_STREAM(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE((obj), \ + GARROW_TYPE_GIO_INPUT_STREAM)) +#define GARROW_IS_GIO_INPUT_STREAM_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE((klass), \ + GARROW_TYPE_GIO_INPUT_STREAM)) +#define GARROW_GIO_INPUT_STREAM_GET_CLASS(obj) \ + (G_TYPE_INSTANCE_GET_CLASS((obj), \ + GARROW_TYPE_GIO_INPUT_STREAM, \ + GArrowGIOInputStreamClass)) + +typedef struct _GArrowGIOInputStream GArrowGIOInputStream; +#ifndef __GTK_DOC_IGNORE__ +typedef struct _GArrowGIOInputStreamClass GArrowGIOInputStreamClass; +#endif + +/** + * GArrowGIOInputStream: + * + * It's an input stream for `GInputStream`. + */ +struct _GArrowGIOInputStream +{ + /*< private >*/ + GArrowSeekableInputStream parent_instance; +}; + +#ifndef __GTK_DOC_IGNORE__ +struct _GArrowGIOInputStreamClass +{ + GArrowSeekableInputStreamClass parent_class; +}; +#endif + +GType garrow_gio_input_stream_get_type(void) G_GNUC_CONST; + +GArrowGIOInputStream *garrow_gio_input_stream_new(GInputStream *gio_input_stream); +#ifndef GARROW_DISABLE_DEPRECATED +G_GNUC_DEPRECATED +GInputStream * +garrow_gio_input_stream_get_raw(GArrowGIOInputStream *input_stream); +#endif + +#define GARROW_TYPE_COMPRESSED_INPUT_STREAM \ + (garrow_compressed_input_stream_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowCompressedInputStream, + garrow_compressed_input_stream, + GARROW, + COMPRESSED_INPUT_STREAM, + GArrowInputStream) +struct _GArrowCompressedInputStreamClass +{ + GArrowInputStreamClass parent_class; +}; + +GArrowCompressedInputStream * +garrow_compressed_input_stream_new(GArrowCodec *codec, + GArrowInputStream *raw, + GError **error); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/input-stream.hpp b/src/arrow/c_glib/arrow-glib/input-stream.hpp new file mode 100644 index 000000000..2a0a3d3dd --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/input-stream.hpp @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/io/compressed.h> +#include <arrow/io/file.h> +#include <arrow/io/interfaces.h> +#include <arrow/io/memory.h> + +#include <arrow-glib/input-stream.h> + +GArrowInputStream *garrow_input_stream_new_raw(std::shared_ptr<arrow::io::InputStream> *arrow_input_stream); +std::shared_ptr<arrow::io::InputStream> garrow_input_stream_get_raw(GArrowInputStream *input_stream); + +GArrowSeekableInputStream * +garrow_seekable_input_stream_new_raw( + std::shared_ptr<arrow::io::RandomAccessFile> *arrow_random_access_file); +std::shared_ptr<arrow::io::RandomAccessFile> +garrow_seekable_input_stream_get_raw(GArrowSeekableInputStream *input_stream); + +GArrowBufferInputStream * +garrow_buffer_input_stream_new_raw(std::shared_ptr<arrow::io::BufferReader> *arrow_buffer_reader, + GArrowBuffer *buffer); +std::shared_ptr<arrow::io::BufferReader> garrow_buffer_input_stream_get_raw(GArrowBufferInputStream *input_stream); + + +GArrowFileInputStream * +garrow_file_input_stream_new_raw( + std::shared_ptr<arrow::io::ReadableFile> *arrow_stream); + + +GArrowMemoryMappedInputStream * +garrow_memory_mapped_input_stream_new_raw( + std::shared_ptr<arrow::io::MemoryMappedFile> *arrow_stream); + + +GArrowCompressedInputStream * +garrow_compressed_input_stream_new_raw(std::shared_ptr<arrow::io::CompressedInputStream> *arrow_raw, + GArrowCodec *codec, + GArrowInputStream *raw); +std::shared_ptr<arrow::io::InputStream> +garrow_compressed_input_stream_get_raw(GArrowCompressedInputStream *stream); diff --git a/src/arrow/c_glib/arrow-glib/internal-hash-table.hpp b/src/arrow/c_glib/arrow-glib/internal-hash-table.hpp new file mode 100644 index 000000000..3def4606c --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/internal-hash-table.hpp @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <glib.h> + +#include <arrow/api.h> + +static inline std::shared_ptr<arrow::KeyValueMetadata> +garrow_internal_hash_table_to_metadata(GHashTable *metadata) +{ + auto arrow_metadata = std::make_shared<arrow::KeyValueMetadata>(); + g_hash_table_foreach(metadata, + [](gpointer key, + gpointer value, + gpointer user_data) { + auto arrow_metadata = + static_cast<std::shared_ptr<arrow::KeyValueMetadata> *>(user_data); + (*arrow_metadata)->Append(static_cast<gchar *>(key), + static_cast<gchar *>(value)); + }, + &arrow_metadata); + return arrow_metadata; +} diff --git a/src/arrow/c_glib/arrow-glib/internal-index.hpp b/src/arrow/c_glib/arrow-glib/internal-index.hpp new file mode 100644 index 000000000..e3d709fc0 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/internal-index.hpp @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <glib.h> + +static inline bool +garrow_internal_index_adjust(gint &i, const gint max) +{ + if (i < 0) { + i += max; + if (i < 0) { + return false; + } + } + if (i >= max) { + return false; + } + return true; +} diff --git a/src/arrow/c_glib/arrow-glib/ipc-options.cpp b/src/arrow/c_glib/arrow-glib/ipc-options.cpp new file mode 100644 index 000000000..86bae4adf --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/ipc-options.cpp @@ -0,0 +1,529 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/codec.hpp> +#include <arrow-glib/enums.h> +#include <arrow-glib/ipc-options.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: ipc-options + * @section_id: ipc-options-classes + * @title: IPC options classes + * @include: arrow-glib/arrow-glib.h + * + * #GArrowReadOptions provides options for reading data. + * + * #GArrowWriteOptions provides options for writing data. + */ + +typedef struct GArrowReadOptionsPrivate_ { + arrow::ipc::IpcReadOptions options; + arrow::ipc::DictionaryMemo dictionary_memo; +} GArrowReadOptionsPrivate; + +enum { + PROP_READ_OPTIONS_MAX_RECURSION_DEPTH = 1, + PROP_READ_OPTIONS_USE_THREADS, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowReadOptions, + garrow_read_options, + G_TYPE_OBJECT); + +#define GARROW_READ_OPTIONS_GET_PRIVATE(obj) \ + static_cast<GArrowReadOptionsPrivate *>( \ + garrow_read_options_get_instance_private( \ + GARROW_READ_OPTIONS(obj))) + +static void +garrow_read_options_finalize(GObject *object) +{ + auto priv = GARROW_READ_OPTIONS_GET_PRIVATE(object); + + priv->options.~IpcReadOptions(); + priv->dictionary_memo.~DictionaryMemo(); + + G_OBJECT_CLASS(garrow_read_options_parent_class)->finalize(object); +} + +static void +garrow_read_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_READ_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_READ_OPTIONS_MAX_RECURSION_DEPTH: + priv->options.max_recursion_depth = g_value_get_int(value); + break; + case PROP_READ_OPTIONS_USE_THREADS: + priv->options.use_threads = g_value_get_boolean(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_read_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_READ_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_READ_OPTIONS_MAX_RECURSION_DEPTH: + g_value_set_int(value, priv->options.max_recursion_depth); + break; + case PROP_READ_OPTIONS_USE_THREADS: + g_value_set_boolean(value, priv->options.use_threads); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_read_options_init(GArrowReadOptions *object) +{ + auto priv = GARROW_READ_OPTIONS_GET_PRIVATE(object); + new(&priv->options) arrow::ipc::IpcReadOptions; + priv->options = arrow::ipc::IpcReadOptions::Defaults(); + new(&priv->dictionary_memo) arrow::ipc::DictionaryMemo; +} + +static void +garrow_read_options_class_init(GArrowReadOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_read_options_finalize; + gobject_class->set_property = garrow_read_options_set_property; + gobject_class->get_property = garrow_read_options_get_property; + + auto options = arrow::ipc::IpcReadOptions::Defaults(); + + GParamSpec *spec; + + /** + * GArrowReadOptions:max-recursion-depth: + * + * The maximum permitted schema nesting depth. + * + * Since: 1.0.0 + */ + spec = g_param_spec_int("max-recursion-depth", + "Max recursion depth", + "The maximum permitted schema nesting depth", + 0, + arrow::ipc::kMaxNestingDepth, + options.max_recursion_depth, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_READ_OPTIONS_MAX_RECURSION_DEPTH, + spec); + + /** + * GArrowReadOptions:use-threads: + * + * Whether to use the global CPU thread pool. + * + * Since: 1.0.0 + */ + spec = g_param_spec_boolean("use-threads", + "Use threads", + "Whether to use the global CPU thread pool", + options.use_threads, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_READ_OPTIONS_USE_THREADS, + spec); +} + +/** + * garrow_read_options_new: + * + * Returns: A newly created #GArrowReadOptions. + * + * Since: 1.0.0 + */ +GArrowReadOptions * +garrow_read_options_new(void) +{ + auto options = g_object_new(GARROW_TYPE_READ_OPTIONS, NULL); + return GARROW_READ_OPTIONS(options); +} + +/** + * garrow_read_options_get_included_fields: + * @options: A #GArrowReadOptions. + * @n_fields: (out): The number of included fields. + * + * Returns: (array length=n_fields) (transfer full): + * Top-level schema fields to include when deserializing + * RecordBatch. If empty, return all deserialized fields. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 1.0.0 + */ +int * +garrow_read_options_get_included_fields(GArrowReadOptions *options, + gsize *n_fields) +{ + auto priv = GARROW_READ_OPTIONS_GET_PRIVATE(options); + if (priv->options.included_fields.empty()) { + if (n_fields) { + *n_fields = 0; + } + return NULL; + } + + auto n = priv->options.included_fields.size(); + auto fields = g_new(int, n); + if (n_fields) { + *n_fields = n; + } + for (size_t i = 0; i < n; ++i) { + fields[i] = priv->options.included_fields[i]; + } + return fields; +} + +/** + * garrow_read_options_set_included_fields: + * @options: A #GArrowReadOptions. + * @fields: (array length=n_fields): Top-level schema fields to + * include when deserializing RecordBatch. If empty, return all + * deserialized fields. + * @n_fields: The number of included fields. + * + * Since: 1.0.0 + */ +void +garrow_read_options_set_included_fields(GArrowReadOptions *options, + int *fields, + gsize n_fields) +{ + auto priv = GARROW_READ_OPTIONS_GET_PRIVATE(options); + + priv->options.included_fields.resize(n_fields); + for (gsize i = 0; i < n_fields; ++i) { + priv->options.included_fields[i] = fields[i]; + } +} + + +typedef struct GArrowWriteOptionsPrivate_ { + arrow::ipc::IpcWriteOptions options; + GArrowCodec *codec; +} GArrowWriteOptionsPrivate; + +enum { + PROP_WRITE_OPTIONS_ALLOW_64BIT = 1, + PROP_WRITE_OPTIONS_MAX_RECURSION_DEPTH, + PROP_WRITE_OPTIONS_ALIGNMENT, + PROP_WRITE_OPTIONS_WRITE_LEGACY_IPC_FORMAT, + PROP_WRITE_OPTIONS_CODEC, + PROP_WRITE_OPTIONS_USE_THREADS, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowWriteOptions, + garrow_write_options, + G_TYPE_OBJECT); + +#define GARROW_WRITE_OPTIONS_GET_PRIVATE(obj) \ + static_cast<GArrowWriteOptionsPrivate *>( \ + garrow_write_options_get_instance_private( \ + GARROW_WRITE_OPTIONS(obj))) + +static void +garrow_write_options_dispose(GObject *object) +{ + auto priv = GARROW_WRITE_OPTIONS_GET_PRIVATE(object); + + if (priv->codec) { + g_object_unref(priv->codec); + priv->codec = NULL; + } + + G_OBJECT_CLASS(garrow_write_options_parent_class)->dispose(object); +} + +static void +garrow_write_options_finalize(GObject *object) +{ + auto priv = GARROW_WRITE_OPTIONS_GET_PRIVATE(object); + + priv->options.~IpcWriteOptions(); + + G_OBJECT_CLASS(garrow_write_options_parent_class)->finalize(object); +} + +static void +garrow_write_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_WRITE_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_WRITE_OPTIONS_ALLOW_64BIT: + priv->options.allow_64bit = g_value_get_boolean(value); + break; + case PROP_WRITE_OPTIONS_MAX_RECURSION_DEPTH: + priv->options.max_recursion_depth = g_value_get_int(value); + break; + case PROP_WRITE_OPTIONS_ALIGNMENT: + priv->options.alignment = g_value_get_int(value); + break; + case PROP_WRITE_OPTIONS_WRITE_LEGACY_IPC_FORMAT: + priv->options.write_legacy_ipc_format = g_value_get_boolean(value); + break; + case PROP_WRITE_OPTIONS_CODEC: + if (priv->codec) { + g_object_unref(priv->codec); + } + priv->codec = GARROW_CODEC(g_value_dup_object(value)); + priv->options.codec = garrow_codec_get_raw(priv->codec); + break; + case PROP_WRITE_OPTIONS_USE_THREADS: + priv->options.use_threads = g_value_get_boolean(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_write_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_WRITE_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_WRITE_OPTIONS_ALLOW_64BIT: + g_value_set_boolean(value, priv->options.allow_64bit); + break; + case PROP_WRITE_OPTIONS_MAX_RECURSION_DEPTH: + g_value_set_int(value, priv->options.max_recursion_depth); + break; + case PROP_WRITE_OPTIONS_ALIGNMENT: + g_value_set_int(value, priv->options.alignment); + break; + case PROP_WRITE_OPTIONS_WRITE_LEGACY_IPC_FORMAT: + g_value_set_boolean(value, priv->options.write_legacy_ipc_format); + break; + case PROP_WRITE_OPTIONS_CODEC: + g_value_set_object(value, priv->codec); + break; + case PROP_WRITE_OPTIONS_USE_THREADS: + g_value_set_boolean(value, priv->options.use_threads); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_write_options_init(GArrowWriteOptions *object) +{ + auto priv = GARROW_WRITE_OPTIONS_GET_PRIVATE(object); + new(&priv->options) arrow::ipc::IpcWriteOptions; + priv->options = arrow::ipc::IpcWriteOptions::Defaults(); + if (priv->options.codec) { + priv->codec = garrow_codec_new_raw(&(priv->options.codec)); + } else { + priv->codec = NULL; + } +} + +static void +garrow_write_options_class_init(GArrowWriteOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_write_options_dispose; + gobject_class->finalize = garrow_write_options_finalize; + gobject_class->set_property = garrow_write_options_set_property; + gobject_class->get_property = garrow_write_options_get_property; + + auto options = arrow::ipc::IpcWriteOptions::Defaults(); + + GParamSpec *spec; + + /** + * GArrowWriteOptions:allow-64bit: + * + * Whether to allow field lengths that don't fit in a signed 32-bit + * int. Some implementations may not be able to parse such streams. + * + * Since: 1.0.0 + */ + spec = g_param_spec_boolean("allow-64bit", + "Allow 64bit", + "Whether to allow signed 64-bit int " + "for field length", + options.allow_64bit, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_WRITE_OPTIONS_ALLOW_64BIT, + spec); + + /** + * GArrowWriteOptions:max-recursion-depth: + * + * The maximum permitted schema nesting depth. + * + * Since: 1.0.0 + */ + spec = g_param_spec_int("max-recursion-depth", + "Max recursion depth", + "The maximum permitted schema nesting depth", + 0, + arrow::ipc::kMaxNestingDepth, + options.max_recursion_depth, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_WRITE_OPTIONS_MAX_RECURSION_DEPTH, + spec); + + /** + * GArrowWriteOptions:alignment: + * + * Write padding after memory buffers to this multiple of + * bytes. Generally 8 or 64. + * + * Since: 1.0.0 + */ + spec = g_param_spec_int("alignment", + "Alignment", + "Write padding " + "after memory buffers to this multiple of bytes", + 0, + G_MAXINT, + options.alignment, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_WRITE_OPTIONS_ALIGNMENT, + spec); + + /** + * GArrowWriteOptions:write-legacy-ipc-format: + * + * Whether to write the pre-0.15.0 encapsulated IPC message format + * consisting of a 4-byte prefix instead of 8 byte. + * + * Since: 1.0.0 + */ + spec = g_param_spec_boolean("write-legacy-ipc-format", + "Write legacy IPC format", + "Whether to write legacy IPC format", + options.write_legacy_ipc_format, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_WRITE_OPTIONS_WRITE_LEGACY_IPC_FORMAT, + spec); + + /** + * GArrowWriteOptions:codec: + * + * Codec to use for compressing and decompressing record batch body + * buffers. This is not part of the Arrow IPC protocol and only for + * internal use (e.g. Feather files). + * + * May only be UNCOMPRESSED, LZ4_FRAME and ZSTD. + * + * Since: 2.0.0 + */ + spec = g_param_spec_object("codec", + "Codec", + "Codec to use for " + "compressing record batch body buffers.", + GARROW_TYPE_CODEC, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_WRITE_OPTIONS_CODEC, + spec); + + /** + * GArrowWriteOptions:use-threads: + * + * Whether to use the global CPU thread pool. + * + * Since: 1.0.0 + */ + spec = g_param_spec_boolean("use-threads", + "Use threads", + "Whether to use the global CPU thread pool", + options.use_threads, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_WRITE_OPTIONS_USE_THREADS, + spec); +} + +/** + * garrow_write_options_new: + * + * Returns: A newly created #GArrowWriteOptions. + * + * Since: 1.0.0 + */ +GArrowWriteOptions * +garrow_write_options_new(void) +{ + auto options = g_object_new(GARROW_TYPE_WRITE_OPTIONS, NULL); + return GARROW_WRITE_OPTIONS(options); +} + +G_END_DECLS + +arrow::ipc::IpcReadOptions * +garrow_read_options_get_raw(GArrowReadOptions *options) +{ + auto priv = GARROW_READ_OPTIONS_GET_PRIVATE(options); + return &(priv->options); +} + +arrow::ipc::DictionaryMemo * +garrow_read_options_get_dictionary_memo_raw(GArrowReadOptions *options) +{ + auto priv = GARROW_READ_OPTIONS_GET_PRIVATE(options); + return &(priv->dictionary_memo); +} + +arrow::ipc::IpcWriteOptions * +garrow_write_options_get_raw(GArrowWriteOptions *options) +{ + auto priv = GARROW_WRITE_OPTIONS_GET_PRIVATE(options); + return &(priv->options); +} diff --git a/src/arrow/c_glib/arrow-glib/ipc-options.h b/src/arrow/c_glib/arrow-glib/ipc-options.h new file mode 100644 index 000000000..3b1d99e38 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/ipc-options.h @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/gobject-type.h> +#include <arrow-glib/version.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_READ_OPTIONS (garrow_read_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowReadOptions, + garrow_read_options, + GARROW, + READ_OPTIONS, + GObject) +struct _GArrowReadOptionsClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_1_0 +GArrowReadOptions * +garrow_read_options_new(void); +GARROW_AVAILABLE_IN_1_0 +int * +garrow_read_options_get_included_fields(GArrowReadOptions *options, + gsize *n_fields); +GARROW_AVAILABLE_IN_1_0 +void +garrow_read_options_set_included_fields(GArrowReadOptions *options, + int *fields, + gsize n_fields); + +#define GARROW_TYPE_WRITE_OPTIONS (garrow_write_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowWriteOptions, + garrow_write_options, + GARROW, + WRITE_OPTIONS, + GObject) +struct _GArrowWriteOptionsClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_1_0 +GArrowWriteOptions * +garrow_write_options_new(void); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/ipc-options.hpp b/src/arrow/c_glib/arrow-glib/ipc-options.hpp new file mode 100644 index 000000000..f57fbd3c1 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/ipc-options.hpp @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/ipc/api.h> + +#include <arrow-glib/ipc-options.h> + +arrow::ipc::IpcReadOptions * +garrow_read_options_get_raw(GArrowReadOptions *options); +arrow::ipc::DictionaryMemo * +garrow_read_options_get_dictionary_memo_raw(GArrowReadOptions *options); + +arrow::ipc::IpcWriteOptions * +garrow_write_options_get_raw(GArrowWriteOptions *options); diff --git a/src/arrow/c_glib/arrow-glib/local-file-system.cpp b/src/arrow/c_glib/arrow-glib/local-file-system.cpp new file mode 100644 index 000000000..c4b29658e --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/local-file-system.cpp @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/file-system.hpp> +#include <arrow-glib/local-file-system.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: local-file-system + * @section_id: local-file-system-classes + * @title: Local file system classes + * @include: arrow-glib/arrow-glib.h + * + * #GArrowLocalFileSystemOptions is a class for specifyiing options of + * an instance of #GArrowLocalFileSystem. + * + * #GArrowLocalFileSystem is a class for an implementation of a file system + * that accesses files on the local machine. + */ + +typedef struct GArrowLocalFileSystemOptionsPrivate_ { + arrow::fs::LocalFileSystemOptions local_file_system_options; +} GArrowLocalFileSystemOptionsPrivate; + +enum { + PROP_LOCAL_FILE_SYSTEM_OPTIONS_USE_MMAP = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowLocalFileSystemOptions, + garrow_local_file_system_options, + G_TYPE_OBJECT) + +#define GARROW_LOCAL_FILE_SYSTEM_OPTIONS_GET_PRIVATE(obj) \ + static_cast<GArrowLocalFileSystemOptionsPrivate *>( \ + garrow_local_file_system_options_get_instance_private( \ + GARROW_LOCAL_FILE_SYSTEM_OPTIONS(obj))) + +static void +garrow_local_file_system_options_finalize(GObject *object) +{ + auto priv = GARROW_LOCAL_FILE_SYSTEM_OPTIONS_GET_PRIVATE(object); + + priv->local_file_system_options.~LocalFileSystemOptions(); + + G_OBJECT_CLASS(garrow_local_file_system_options_parent_class)->finalize(object); +} + +static void +garrow_local_file_system_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_LOCAL_FILE_SYSTEM_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_LOCAL_FILE_SYSTEM_OPTIONS_USE_MMAP: + priv->local_file_system_options.use_mmap = g_value_get_boolean(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_local_file_system_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_LOCAL_FILE_SYSTEM_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_LOCAL_FILE_SYSTEM_OPTIONS_USE_MMAP: + g_value_set_boolean(value, priv->local_file_system_options.use_mmap); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_local_file_system_options_init(GArrowLocalFileSystemOptions *object) +{ + auto priv = GARROW_LOCAL_FILE_SYSTEM_OPTIONS_GET_PRIVATE(object); + new(&priv->local_file_system_options) arrow::fs::LocalFileSystemOptions; +} + +static void +garrow_local_file_system_options_class_init(GArrowLocalFileSystemOptionsClass *klass) +{ + GParamSpec *spec; + + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_local_file_system_options_finalize; + gobject_class->set_property = garrow_local_file_system_options_set_property; + gobject_class->get_property = garrow_local_file_system_options_get_property; + + auto local_file_system_options = arrow::fs::LocalFileSystemOptions::Defaults(); + + /** + * GArrowLocalFileSystemOptions:use-mmap: + * + * Whether open_input_stream and open_input_file return a mmap'ed file, + * or a regular one. + * + * Since: 0.17.0 + */ + spec = g_param_spec_boolean("use-mmap", + "Use mmap", + "Whether to use mmap", + local_file_system_options.use_mmap, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_LOCAL_FILE_SYSTEM_OPTIONS_USE_MMAP, + spec); +} + +/** + * garrow_local_file_system_options_new: + * + * Returns: (transfer full): A newly created #GArrowLocalFileSystemOptions. + * + * Since: 0.17.0 + */ +GArrowLocalFileSystemOptions * +garrow_local_file_system_options_new(void) +{ + return GARROW_LOCAL_FILE_SYSTEM_OPTIONS( + g_object_new(GARROW_TYPE_LOCAL_FILE_SYSTEM_OPTIONS, NULL)); +} + +/* arrow::fs::LocalFileSystem */ + +G_DEFINE_TYPE(GArrowLocalFileSystem, + garrow_local_file_system, + GARROW_TYPE_FILE_SYSTEM) + +static void +garrow_local_file_system_init(GArrowLocalFileSystem *file_system) +{ +} + +static void +garrow_local_file_system_class_init(GArrowLocalFileSystemClass *klass) +{ +} + +/** + * garrow_local_file_system_new: + * @options: (nullable): A #GArrowLocalFileSystemOptions. + * + * Returns: (transfer full): A newly created #GArrowLocalFileSystem. + * + * Since: 0.17.0 + */ +GArrowLocalFileSystem * +garrow_local_file_system_new(GArrowLocalFileSystemOptions *options) +{ + if (options) { + const auto &arrow_options = + garrow_local_file_system_options_get_raw(options); + auto arrow_local_file_system = + std::static_pointer_cast<arrow::fs::FileSystem>( + std::make_shared<arrow::fs::LocalFileSystem>(arrow_options)); + return garrow_local_file_system_new_raw(&arrow_local_file_system); + } else { + auto arrow_local_file_system = + std::static_pointer_cast<arrow::fs::FileSystem>( + std::make_shared<arrow::fs::LocalFileSystem>()); + return garrow_local_file_system_new_raw(&arrow_local_file_system); + } +} + +G_END_DECLS + +arrow::fs::LocalFileSystemOptions & +garrow_local_file_system_options_get_raw(GArrowLocalFileSystemOptions *options) +{ + auto priv = GARROW_LOCAL_FILE_SYSTEM_OPTIONS_GET_PRIVATE(options); + return priv->local_file_system_options; +} + +GArrowLocalFileSystem * +garrow_local_file_system_new_raw(std::shared_ptr<arrow::fs::FileSystem> *arrow_file_system) +{ + return GARROW_LOCAL_FILE_SYSTEM( + g_object_new(GARROW_TYPE_LOCAL_FILE_SYSTEM, + "file-system", arrow_file_system, + NULL)); +} diff --git a/src/arrow/c_glib/arrow-glib/local-file-system.h b/src/arrow/c_glib/arrow-glib/local-file-system.h new file mode 100644 index 000000000..774149796 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/local-file-system.h @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/file-system.h> + +G_BEGIN_DECLS + +/* arrow::fs::LocalFileSystemOptions */ + +#define GARROW_TYPE_LOCAL_FILE_SYSTEM_OPTIONS (garrow_local_file_system_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowLocalFileSystemOptions, + garrow_local_file_system_options, + GARROW, + LOCAL_FILE_SYSTEM_OPTIONS, + GObject) +struct _GArrowLocalFileSystemOptionsClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_17 +GArrowLocalFileSystemOptions * +garrow_local_file_system_options_new(void); + +/* arrow::fs::LocalFileSystem */ + +#define GARROW_TYPE_LOCAL_FILE_SYSTEM (garrow_local_file_system_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowLocalFileSystem, + garrow_local_file_system, + GARROW, + LOCAL_FILE_SYSTEM, + GArrowFileSystem) +struct _GArrowLocalFileSystemClass +{ + GArrowFileSystemClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_17 +GArrowLocalFileSystem * +garrow_local_file_system_new(GArrowLocalFileSystemOptions *options); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/local-file-system.hpp b/src/arrow/c_glib/arrow-glib/local-file-system.hpp new file mode 100644 index 000000000..ac7a9de0a --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/local-file-system.hpp @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/filesystem/api.h> + +#include <arrow-glib/local-file-system.h> + +arrow::fs::LocalFileSystemOptions & +garrow_local_file_system_options_get_raw(GArrowLocalFileSystemOptions *options); + +GArrowLocalFileSystem * +garrow_local_file_system_new_raw( + std::shared_ptr<arrow::fs::FileSystem> *arrow_file_system); + diff --git a/src/arrow/c_glib/arrow-glib/meson.build b/src/arrow/c_glib/arrow-glib/meson.build new file mode 100644 index 000000000..9a399c9dd --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/meson.build @@ -0,0 +1,280 @@ +# -*- indent-tabs-mode: nil -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +sources = files( + 'array-builder.cpp', + 'basic-array.cpp', + 'basic-data-type.cpp', + 'buffer.cpp', + 'chunked-array.cpp', + 'codec.cpp', + 'composite-array.cpp', + 'composite-data-type.cpp', + 'datum.cpp', + 'decimal.cpp', + 'error.cpp', + 'expression.cpp', + 'field.cpp', + 'record-batch.cpp', + 'scalar.cpp', + 'schema.cpp', + 'table.cpp', + 'table-builder.cpp', + 'tensor.cpp', + 'type.cpp', +) + +sources += files( + 'file.cpp', + 'file-mode.cpp', + 'input-stream.cpp', + 'output-stream.cpp', + 'readable.cpp', + 'writable.cpp', + 'writable-file.cpp', +) + +sources += files( + 'ipc-options.cpp', + 'metadata-version.cpp', + 'reader.cpp', + 'writer.cpp', +) + +sources += files( + 'compute.cpp', +) + +sources += files( + 'file-system.cpp', + 'local-file-system.cpp', +) + +if have_arrow_orc + sources += files( + 'orc-file-reader.cpp', + ) +endif + +c_headers = files( + 'array.h', + 'array-builder.h', + 'arrow-glib.h', + 'basic-array.h', + 'basic-data-type.h', + 'buffer.h', + 'chunked-array.h', + 'codec.h', + 'composite-array.h', + 'composite-data-type.h', + 'data-type.h', + 'datum.h', + 'decimal.h', + 'error.h', + 'expression.h', + 'field.h', + 'gobject-type.h', + 'record-batch.h', + 'scalar.h', + 'schema.h', + 'table.h', + 'table-builder.h', + 'tensor.h', + 'type.h', +) + + +c_headers += files( + 'file.h', + 'file-mode.h', + 'input-stream.h', + 'output-stream.h', + 'readable.h', + 'writable.h', + 'writable-file.h', +) + +c_headers += files( + 'ipc-options.h', + 'metadata-version.h', + 'reader.h', + 'writer.h', +) + +c_headers += files( + 'compute.h', +) + +c_headers += files( + 'file-system.h', + 'local-file-system.h', +) + +if have_arrow_orc + c_headers += files( + 'orc-file-reader.h', + ) +endif + + +cpp_headers = files( + 'array.hpp', + 'array-builder.hpp', + 'arrow-glib.hpp', + 'basic-array.hpp', + 'basic-data-type.hpp', + 'buffer.hpp', + 'chunked-array.hpp', + 'codec.hpp', + 'data-type.hpp', + 'datum.hpp', + 'decimal.hpp', + 'error.hpp', + 'expression.hpp', + 'field.hpp', + 'record-batch.hpp', + 'scalar.hpp', + 'schema.hpp', + 'table.hpp', + 'table-builder.hpp', + 'tensor.hpp', + 'type.hpp', +) + +cpp_headers += files( + 'file.hpp', + 'file-mode.hpp', + 'input-stream.hpp', + 'output-stream.hpp', + 'readable.hpp', + 'writable.hpp', + 'writable-file.hpp', +) + +cpp_headers += files( + 'ipc-options.hpp', + 'metadata-version.hpp', + 'reader.hpp', + 'writer.hpp', +) + +cpp_headers += files( + 'compute.hpp', +) + +cpp_headers += files( + 'file-system.hpp', + 'local-file-system.hpp', +) + +if have_arrow_orc + cpp_headers += files( + 'orc-file-reader.hpp', + ) +endif + +cpp_internal_headers = files( + 'internal-hash-table.hpp', + 'internal-index.hpp', +) + +version_h_conf = configuration_data() +version_h_conf.set('GARROW_VERSION_MAJOR', version_major) +version_h_conf.set('GARROW_VERSION_MINOR', version_minor) +version_h_conf.set('GARROW_VERSION_MICRO', version_micro) +version_h_conf.set('GARROW_VERSION_TAG', version_tag) +version_h = configure_file(input: 'version.h.in', + output: 'version.h', + configuration: version_h_conf) +c_headers += version_h + +enums = gnome.mkenums('enums', + sources: c_headers, + identifier_prefix: 'GArrow', + symbol_prefix: 'garrow', + c_template: 'enums.c.template', + h_template: 'enums.h.template', + install_dir: join_paths(include_dir, meson.project_name()), + install_header: true) +enums_source = enums[0] +enums_header = enums[1] + + +headers = c_headers + cpp_headers +install_headers(headers, subdir: meson.project_name()) + + +gobject = dependency('gobject-2.0') +gobject_libdir = gobject.get_variable(pkgconfig: 'libdir') +# This is for Homebrew. "pkg-config --cflags gio-2.0" includes the +# "-I$(xcrun --show-sdk-path)/usr/include" flag by zlib.pc. The +# include path includes the standard C headers such as stdlib.h. It +# confuses clang++ (/usr/bin/c++). +gio = cxx.find_library('gio-2.0', dirs: [gobject_libdir], required: false) +if not gio.found() + gio = dependency('gio-2.0') +endif +dependencies = [ + arrow, + gobject, + gio, +] +libarrow_glib = library('arrow-glib', + sources: sources + enums, + install: true, + dependencies: dependencies, + include_directories: base_include_directories, + soversion: so_version, + version: library_version) +arrow_glib = declare_dependency(link_with: libarrow_glib, + include_directories: base_include_directories, + dependencies: dependencies, + sources: enums_header) + +pkgconfig.generate(libarrow_glib, + filebase: meson.project_name(), + name: 'Apache Arrow GLib', + description: 'C API for Apache Arrow based on GLib', + version: version, + requires: ['gobject-2.0', 'arrow']) +if have_arrow_orc + pkgconfig.generate(filebase: 'arrow-orc-glib', + name: 'Apache Arrow GLib ORC', + description: 'ORC modules for Apache Arrow GLib', + version: version, + requires: ['arrow-glib']) +endif + +if have_gi + arrow_glib_gir = gnome.generate_gir(libarrow_glib, + sources: sources + c_headers + enums, + namespace: 'Arrow', + nsversion: api_version, + identifier_prefix: 'GArrow', + symbol_prefix: 'garrow', + export_packages: 'arrow-glib', + includes: [ + 'GObject-2.0', + 'Gio-2.0', + ], + install: true, + extra_args: [ + '--warn-all', + ]) +endif diff --git a/src/arrow/c_glib/arrow-glib/metadata-version.cpp b/src/arrow/c_glib/arrow-glib/metadata-version.cpp new file mode 100644 index 000000000..69cbaec37 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/metadata-version.cpp @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/metadata-version.hpp> + +/** + * SECTION: metadata-version + * @title: GArrowMetadataVersion + * @short_description: Metadata version mapgging between Arrow and arrow-glib + * + * #GArrowMetadataVersion provides metadata versions corresponding + * to `arrow::ipc::MetadataVersion` values. + */ + +GArrowMetadataVersion +garrow_metadata_version_from_raw(arrow::ipc::MetadataVersion version) +{ + switch (version) { + case arrow::ipc::MetadataVersion::V1: + return GARROW_METADATA_VERSION_V1; + case arrow::ipc::MetadataVersion::V2: + return GARROW_METADATA_VERSION_V2; + case arrow::ipc::MetadataVersion::V3: + return GARROW_METADATA_VERSION_V3; + default: + return GARROW_METADATA_VERSION_V3; + } +} + +arrow::ipc::MetadataVersion +garrow_metadata_version_to_raw(GArrowMetadataVersion version) +{ + switch (version) { + case GARROW_METADATA_VERSION_V1: + return arrow::ipc::MetadataVersion::V1; + case GARROW_METADATA_VERSION_V2: + return arrow::ipc::MetadataVersion::V2; + case GARROW_METADATA_VERSION_V3: + return arrow::ipc::MetadataVersion::V3; + default: + return arrow::ipc::MetadataVersion::V3; + } +} diff --git a/src/arrow/c_glib/arrow-glib/metadata-version.h b/src/arrow/c_glib/arrow-glib/metadata-version.h new file mode 100644 index 000000000..d902a3949 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/metadata-version.h @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <glib-object.h> + +G_BEGIN_DECLS + +/** + * GArrowMetadataVersion: + * @GARROW_METADATA_VERSION_V1: Version 1. + * @GARROW_METADATA_VERSION_V2: Version 2. + * @GARROW_METADATA_VERSION_V3: Version 3. + * + * They are corresponding to `arrow::ipc::MetadataVersion::type` + * values. + */ +typedef enum { + GARROW_METADATA_VERSION_V1, + GARROW_METADATA_VERSION_V2, + GARROW_METADATA_VERSION_V3 +} GArrowMetadataVersion; + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/metadata-version.hpp b/src/arrow/c_glib/arrow-glib/metadata-version.hpp new file mode 100644 index 000000000..7b3865e59 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/metadata-version.hpp @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/ipc/api.h> + +#include <arrow-glib/metadata-version.h> + +GArrowMetadataVersion garrow_metadata_version_from_raw(arrow::ipc::MetadataVersion version); +arrow::ipc::MetadataVersion garrow_metadata_version_to_raw(GArrowMetadataVersion version); diff --git a/src/arrow/c_glib/arrow-glib/orc-file-reader.cpp b/src/arrow/c_glib/arrow-glib/orc-file-reader.cpp new file mode 100644 index 000000000..084198a82 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/orc-file-reader.cpp @@ -0,0 +1,454 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/error.hpp> +#include <arrow-glib/input-stream.hpp> +#include <arrow-glib/orc-file-reader.hpp> +#include <arrow-glib/record-batch.hpp> +#include <arrow-glib/schema.hpp> +#include <arrow-glib/table.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: orc-file-reader + * @section_id: orc-file-reader + * @title: ORC reader + * @include: arrow-glib/orc-file-reader.h + * + * #GArrowORCFileReader is a class for reading stripes in ORC file + * format from input. + */ + +typedef struct GArrowORCFileReaderPrivate_ { + GArrowSeekableInputStream *input; + arrow::adapters::orc::ORCFileReader *orc_file_reader; + GArray *field_indices; +} GArrowORCFileReaderPrivate; + +enum { + PROP_0, + PROP_INPUT, + PROP_ORC_FILE_READER +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowORCFileReader, + garrow_orc_file_reader, + G_TYPE_OBJECT); + +#define GARROW_ORC_FILE_READER_GET_PRIVATE(obj) \ + static_cast<GArrowORCFileReaderPrivate *>( \ + garrow_orc_file_reader_get_instance_private( \ + GARROW_ORC_FILE_READER(obj))) + +static void +garrow_orc_file_reader_dispose(GObject *object) +{ + auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(object); + + if (priv->input) { + g_object_unref(priv->input); + priv->input = NULL; + } + + G_OBJECT_CLASS(garrow_orc_file_reader_parent_class)->dispose(object); +} + +static void +garrow_orc_file_reader_finalize(GObject *object) +{ + auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(object); + + delete priv->orc_file_reader; + + if (priv->field_indices) { + g_array_free(priv->field_indices, TRUE); + } + + G_OBJECT_CLASS(garrow_orc_file_reader_parent_class)->finalize(object); +} + +static void +garrow_orc_file_reader_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_INPUT: + priv->input = GARROW_SEEKABLE_INPUT_STREAM(g_value_dup_object(value)); + break; + case PROP_ORC_FILE_READER: + priv->orc_file_reader = + static_cast<arrow::adapters::orc::ORCFileReader *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_orc_file_reader_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_INPUT: + g_value_set_object(value, priv->input); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_orc_file_reader_init(GArrowORCFileReader *object) +{ +} + +static void +garrow_orc_file_reader_class_init(GArrowORCFileReaderClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_orc_file_reader_dispose; + gobject_class->finalize = garrow_orc_file_reader_finalize; + gobject_class->set_property = garrow_orc_file_reader_set_property; + gobject_class->get_property = garrow_orc_file_reader_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("input", + "Input", + "The input stream", + GARROW_TYPE_SEEKABLE_INPUT_STREAM, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_INPUT, spec); + + spec = g_param_spec_pointer("orc-file-reader", + "arrow::adapters::orc::ORCFileReader", + "The raw arrow::adapters::orc::ORCFileReader *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_ORC_FILE_READER, spec); +} + + +/** + * garrow_orc_file_reader_new: + * @file: The file to be read. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowORCFileReader + * or %NULL on error. + * + * Since: 0.10.0 + */ +GArrowORCFileReader * +garrow_orc_file_reader_new(GArrowSeekableInputStream *input, + GError **error) +{ + auto arrow_random_access_file = garrow_seekable_input_stream_get_raw(input); + auto pool = arrow::default_memory_pool(); + auto arrow_reader_result = + arrow::adapters::orc::ORCFileReader::Open(arrow_random_access_file, + pool); + if (garrow::check(error, arrow_reader_result, "[orc-file-reader][new]")) { + return garrow_orc_file_reader_new_raw(input, + (*arrow_reader_result).release()); + } else { + return NULL; + } +} + +/** + * garrow_orc_file_reader_set_field_indexes: + * @reader: A #GArrowORCFileReader. + * @field_indexes: (nullable) (array length=n_field_indexes): + * The field indexes to be read. + * @n_field_indexes: The number of the specified indexes. + * + * Since: 0.10.0 + * + * Deprecated: 0.12.0: + * Use garrow_orc_file_reader_set_field_indices() instead. + */ +void +garrow_orc_file_reader_set_field_indexes(GArrowORCFileReader *reader, + const gint *field_indexes, + guint n_field_indexes) +{ + garrow_orc_file_reader_set_field_indices(reader, + field_indexes, + n_field_indexes); +} + +/** + * garrow_orc_file_reader_set_field_indices: + * @reader: A #GArrowORCFileReader. + * @field_indices: (nullable) (array length=n_field_indices): + * The field indices to be read. + * @n_field_indices: The number of the specified indices. + * + * Since: 0.12.0 + */ +void +garrow_orc_file_reader_set_field_indices(GArrowORCFileReader *reader, + const gint *field_indices, + guint n_field_indices) +{ + auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader); + if (priv->field_indices) { + g_array_free(priv->field_indices, TRUE); + } + if (n_field_indices == 0) { + priv->field_indices = NULL; + } else { + priv->field_indices = g_array_sized_new(FALSE, + FALSE, + sizeof(gint), + n_field_indices); + g_array_append_vals(priv->field_indices, field_indices, n_field_indices); + } +} + +/** + * garrow_orc_file_reader_get_field_indexes: + * @reader: A #GArrowORCFileReader. + * @n_field_indexes: The number of the specified indexes. + * + * Returns: (nullable) (array length=n_field_indexes) (transfer none): + * The field indexes to be read. + * + * Since: 0.10.0 + * + * Deprecated: 0.12.0: + * Use garrow_orc_file_reader_get_field_indices() instead. + */ +const gint * +garrow_orc_file_reader_get_field_indexes(GArrowORCFileReader *reader, + guint *n_field_indexes) +{ + return garrow_orc_file_reader_get_field_indices(reader, n_field_indexes); +} + +/** + * garrow_orc_file_reader_get_field_indices: + * @reader: A #GArrowORCFileReader. + * @n_field_indices: The number of the specified indices. + * + * Returns: (nullable) (array length=n_field_indices) (transfer none): + * The field indices to be read. + * + * Since: 0.12.0 + */ +const gint * +garrow_orc_file_reader_get_field_indices(GArrowORCFileReader *reader, + guint *n_field_indices) +{ + auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader); + if (priv->field_indices) { + *n_field_indices = priv->field_indices->len; + return reinterpret_cast<gint *>(priv->field_indices->data); + } else { + *n_field_indices = 0; + return NULL; + } +} + +/** + * garrow_orc_file_reader_read_type: + * @reader: A #GArrowORCFileReader. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): A newly read type as + * #GArrowSchema or %NULL on error. + * + * Since: 0.10.0 + */ +GArrowSchema * +garrow_orc_file_reader_read_type(GArrowORCFileReader *reader, + GError **error) +{ + auto arrow_reader = garrow_orc_file_reader_get_raw(reader); + auto arrow_schema_result = arrow_reader->ReadSchema(); + if (garrow::check(error, + arrow_schema_result, + "[orc-file-reader][read-type]")) { + auto arrow_schema = *arrow_schema_result; + return garrow_schema_new_raw(&arrow_schema); + } else { + return NULL; + } +} + +/** + * garrow_orc_file_reader_read_stripes: + * @reader: A #GArrowORCFileReader. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): A newly read stripes as + * #GArrowTable or %NULL on error. + * + * Since: 0.10.0 + */ +GArrowTable * +garrow_orc_file_reader_read_stripes(GArrowORCFileReader *reader, + GError **error) +{ + auto arrow_reader = garrow_orc_file_reader_get_raw(reader); + auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader); + if (priv->field_indices) { + std::vector<int> arrow_field_indices; + auto field_indices = priv->field_indices; + for (guint i = 0; i < field_indices->len; ++i) { + arrow_field_indices.push_back(g_array_index(field_indices, gint, i)); + } + auto arrow_table_result = arrow_reader->Read(arrow_field_indices); + if (garrow::check(error, + arrow_table_result, + "[orc-file-reader][read-stripes]")) { + auto arrow_table = *arrow_table_result; + return garrow_table_new_raw(&arrow_table); + } else { + return NULL; + } + } else { + auto arrow_table_result = arrow_reader->Read(); + if (garrow::check(error, + arrow_table_result, + "[orc-file-reader][read-stripes]")) { + auto arrow_table = *arrow_table_result; + return garrow_table_new_raw(&arrow_table); + } else { + return NULL; + } + } +} + +/** + * garrow_orc_file_reader_read_stripe: + * @reader: A #GArrowORCFileReader. + * @i: The stripe index to be read. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): A newly read stripe as + * #GArrowRecordBatch or %NULL on error. + * + * Since: 0.10.0 + */ +GArrowRecordBatch * +garrow_orc_file_reader_read_stripe(GArrowORCFileReader *reader, + gint64 i, + GError **error) +{ + auto arrow_reader = garrow_orc_file_reader_get_raw(reader); + if (i < 0) { + i += arrow_reader->NumberOfStripes(); + } + auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader); + if (priv->field_indices) { + std::vector<int> arrow_field_indices; + auto field_indices = priv->field_indices; + for (guint j = 0; j < field_indices->len; ++j) { + arrow_field_indices.push_back(g_array_index(field_indices, gint, j)); + } + std::shared_ptr<arrow::RecordBatch> arrow_record_batch; + auto arrow_record_batch_result = + arrow_reader->ReadStripe(i, arrow_field_indices); + if (garrow::check(error, + arrow_record_batch_result, + "[orc-file-reader][read-stripe]")) { + auto arrow_record_batch = *arrow_record_batch_result; + return garrow_record_batch_new_raw(&arrow_record_batch); + } else { + return NULL; + } + } else { + auto arrow_record_batch_result = arrow_reader->ReadStripe(i); + if (garrow::check(error, + arrow_record_batch_result, + "[orc-file-reader][read-stripe]")) { + auto arrow_record_batch = *arrow_record_batch_result; + return garrow_record_batch_new_raw(&arrow_record_batch); + } else { + return NULL; + } + } +} + +/** + * garrow_orc_file_reader_get_n_stripes: + * @reader: A #GArrowORCFileReader. + * + * Returns: The number of stripes in the file. + * + * Since: 0.10.0 + */ +gint64 +garrow_orc_file_reader_get_n_stripes(GArrowORCFileReader *reader) +{ + auto arrow_reader = garrow_orc_file_reader_get_raw(reader); + return arrow_reader->NumberOfStripes(); +} + +/** + * garrow_orc_file_reader_get_n_rows: + * @reader: A #GArrowORCFileReader. + * + * Returns: The number of rows in the file. + * + * Since: 0.10.0 + */ +gint64 +garrow_orc_file_reader_get_n_rows(GArrowORCFileReader *reader) +{ + auto arrow_reader = garrow_orc_file_reader_get_raw(reader); + return arrow_reader->NumberOfRows(); +} + + +G_END_DECLS + + +GArrowORCFileReader * +garrow_orc_file_reader_new_raw(GArrowSeekableInputStream *input, + arrow::adapters::orc::ORCFileReader *arrow_reader) +{ + auto reader = + GARROW_ORC_FILE_READER(g_object_new(GARROW_TYPE_ORC_FILE_READER, + "input", input, + "orc-file-reader", arrow_reader, + NULL)); + return reader; +} + +arrow::adapters::orc::ORCFileReader * +garrow_orc_file_reader_get_raw(GArrowORCFileReader *reader) +{ + auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader); + return priv->orc_file_reader; +} diff --git a/src/arrow/c_glib/arrow-glib/orc-file-reader.h b/src/arrow/c_glib/arrow-glib/orc-file-reader.h new file mode 100644 index 000000000..9551d52e0 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/orc-file-reader.h @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/reader.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_ORC_FILE_READER (garrow_orc_file_reader_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowORCFileReader, + garrow_orc_file_reader, + GARROW, + ORC_FILE_READER, + GObject) +struct _GArrowORCFileReaderClass +{ + GObjectClass parent_class; +}; + +GArrowORCFileReader * +garrow_orc_file_reader_new(GArrowSeekableInputStream *file, + GError **error); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_orc_file_reader_set_field_indices) +void +garrow_orc_file_reader_set_field_indexes(GArrowORCFileReader *reader, + const gint *field_indexes, + guint n_field_indexes); +#endif +GARROW_AVAILABLE_IN_0_12 +void +garrow_orc_file_reader_set_field_indices(GArrowORCFileReader *reader, + const gint *field_indices, + guint n_field_indices); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_orc_file_reader_get_field_indices) +const gint * +garrow_orc_file_reader_get_field_indexes(GArrowORCFileReader *reader, + guint *n_field_indexes); +#endif +GARROW_AVAILABLE_IN_0_12 +const gint * +garrow_orc_file_reader_get_field_indices(GArrowORCFileReader *reader, + guint *n_field_indices); +GArrowSchema * +garrow_orc_file_reader_read_type(GArrowORCFileReader *reader, + GError **error); +GArrowTable * +garrow_orc_file_reader_read_stripes(GArrowORCFileReader *reader, + GError **error); +GArrowRecordBatch * +garrow_orc_file_reader_read_stripe(GArrowORCFileReader *reader, + gint64 i, + GError **error); +gint64 garrow_orc_file_reader_get_n_stripes(GArrowORCFileReader *reader); +gint64 garrow_orc_file_reader_get_n_rows(GArrowORCFileReader *reader); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/orc-file-reader.hpp b/src/arrow/c_glib/arrow-glib/orc-file-reader.hpp new file mode 100644 index 000000000..417129052 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/orc-file-reader.hpp @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/adapters/orc/adapter.h> + +#include <arrow-glib/reader.hpp> +#include <arrow-glib/orc-file-reader.h> + +GArrowORCFileReader * +garrow_orc_file_reader_new_raw(GArrowSeekableInputStream *input, + arrow::adapters::orc::ORCFileReader *arrow_reader); +arrow::adapters::orc::ORCFileReader * +garrow_orc_file_reader_get_raw(GArrowORCFileReader *reader); diff --git a/src/arrow/c_glib/arrow-glib/output-stream.cpp b/src/arrow/c_glib/arrow-glib/output-stream.cpp new file mode 100644 index 000000000..9eaee9a81 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/output-stream.cpp @@ -0,0 +1,763 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow/io/memory.h> +#include <arrow/ipc/writer.h> + +#include <arrow-glib/buffer.hpp> +#include <arrow-glib/codec.hpp> +#include <arrow-glib/error.hpp> +#include <arrow-glib/file.hpp> +#include <arrow-glib/ipc-options.hpp> +#include <arrow-glib/output-stream.hpp> +#include <arrow-glib/record-batch.hpp> +#include <arrow-glib/tensor.hpp> +#include <arrow-glib/writable.hpp> + +#include <iostream> +#include <sstream> + +G_BEGIN_DECLS + +/** + * SECTION: output-stream + * @section_id: output-stream-classes + * @title: Output stream classes + * @include: arrow-glib/arrow-glib.h + * + * #GArrowOutputStream is an interface for stream output. Stream + * output is file based and writable + * + * #GArrowFileOutputStream is a class for file output stream. + * + * #GArrowBufferOutputStream is a class for buffer output stream. + * + * #GArrowGIOOutputStream is a class for `GOutputStream` based output + * stream. + * + * #GArrowCompressedOutputStream is a class to write compressed data to + * output stream. + */ + +typedef struct GArrowOutputStreamPrivate_ { + std::shared_ptr<arrow::io::OutputStream> output_stream; +} GArrowOutputStreamPrivate; + +enum { + PROP_0, + PROP_OUTPUT_STREAM +}; + +static std::shared_ptr<arrow::io::FileInterface> +garrow_output_stream_get_raw_file_interface(GArrowFile *file) +{ + auto output_stream = GARROW_OUTPUT_STREAM(file); + auto arrow_output_stream = garrow_output_stream_get_raw(output_stream); + return arrow_output_stream; +} + +static void +garrow_output_stream_file_interface_init(GArrowFileInterface *iface) +{ + iface->get_raw = garrow_output_stream_get_raw_file_interface; +} + +static std::shared_ptr<arrow::io::Writable> +garrow_output_stream_get_raw_writable_interface(GArrowWritable *writable) +{ + auto output_stream = GARROW_OUTPUT_STREAM(writable); + auto arrow_output_stream = garrow_output_stream_get_raw(output_stream); + return arrow_output_stream; +} + +static void +garrow_output_stream_writable_interface_init(GArrowWritableInterface *iface) +{ + iface->get_raw = garrow_output_stream_get_raw_writable_interface; +} + +G_DEFINE_TYPE_WITH_CODE(GArrowOutputStream, + garrow_output_stream, + G_TYPE_OBJECT, + G_ADD_PRIVATE(GArrowOutputStream) + G_IMPLEMENT_INTERFACE(GARROW_TYPE_FILE, + garrow_output_stream_file_interface_init) + G_IMPLEMENT_INTERFACE(GARROW_TYPE_WRITABLE, + garrow_output_stream_writable_interface_init)); + +#define GARROW_OUTPUT_STREAM_GET_PRIVATE(obj) \ + static_cast<GArrowOutputStreamPrivate *>( \ + garrow_output_stream_get_instance_private( \ + GARROW_OUTPUT_STREAM(obj))) + +static void +garrow_output_stream_finalize(GObject *object) +{ + auto priv = GARROW_OUTPUT_STREAM_GET_PRIVATE(object); + + priv->output_stream.~shared_ptr(); + + G_OBJECT_CLASS(garrow_output_stream_parent_class)->finalize(object); +} + +static void +garrow_output_stream_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_OUTPUT_STREAM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_OUTPUT_STREAM: + priv->output_stream = + *static_cast<std::shared_ptr<arrow::io::OutputStream> *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_output_stream_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + switch (prop_id) { + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_output_stream_init(GArrowOutputStream *object) +{ + auto priv = GARROW_OUTPUT_STREAM_GET_PRIVATE(object); + new(&priv->output_stream) std::shared_ptr<arrow::io::OutputStream>; +} + +static void +garrow_output_stream_class_init(GArrowOutputStreamClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_output_stream_finalize; + gobject_class->set_property = garrow_output_stream_set_property; + gobject_class->get_property = garrow_output_stream_get_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("output-stream", + "io::OutputStream", + "The raw std::shared<arrow::io::OutputStream> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_OUTPUT_STREAM, spec); +} + +/** + * garrow_output_stream_align: + * @stream: A #GArrowOutputStream. + * @alignment: The byte multiple for the metadata prefix, usually 8 + * or 64, to ensure the body starts on a multiple of that alignment. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE on error. + * + * Since: 0.11.0 + */ +gboolean +garrow_output_stream_align(GArrowOutputStream *stream, + gint32 alignment, + GError **error) +{ + auto arrow_stream = garrow_output_stream_get_raw(stream); + auto status = arrow::ipc::AlignStream(arrow_stream.get(), alignment); + return garrow::check(error, status, "[output-stream][align]"); +} + +/** + * garrow_output_stream_write_tensor: + * @stream: A #GArrowOutputStream. + * @tensor: A #GArrowTensor to be written. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: The number of written bytes on success, -1 on error. + * + * Since: 0.4.0 + */ +gint64 +garrow_output_stream_write_tensor(GArrowOutputStream *stream, + GArrowTensor *tensor, + GError **error) +{ + auto arrow_stream = garrow_output_stream_get_raw(stream); + auto arrow_tensor = garrow_tensor_get_raw(tensor); + int32_t metadata_length; + int64_t body_length; + auto status = arrow::ipc::WriteTensor(*arrow_tensor, + arrow_stream.get(), + &metadata_length, + &body_length); + if (garrow::check(error, status, "[output-stream][write-tensor]")) { + return metadata_length + body_length; + } else { + return -1; + } +} + +/** + * garrow_output_stream_write_record_batch: + * @stream: A #GArrowOutputStream. + * @record_batch: A #GArrowRecordBatch to be written. + * @options: (nullable): A #GArrowWriteOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: The number of written bytes on success, -1 on error. + * + * Since: 1.0.0 + */ +gint64 +garrow_output_stream_write_record_batch(GArrowOutputStream *stream, + GArrowRecordBatch *record_batch, + GArrowWriteOptions *options, + GError **error) +{ + auto arrow_stream = garrow_output_stream_get_raw(stream); + auto arrow_record_batch = garrow_record_batch_get_raw(record_batch); + int64_t buffer_start_offset = 0; + int32_t metadata_length; + int64_t body_length; + arrow::Status status; + if (options) { + auto arrow_options = garrow_write_options_get_raw(options); + status = arrow::ipc::WriteRecordBatch(*arrow_record_batch, + buffer_start_offset, + arrow_stream.get(), + &metadata_length, + &body_length, + *arrow_options); + } else { + auto arrow_options = arrow::ipc::IpcWriteOptions::Defaults(); + status = arrow::ipc::WriteRecordBatch(*arrow_record_batch, + buffer_start_offset, + arrow_stream.get(), + &metadata_length, + &body_length, + arrow_options); + } + if (garrow::check(error, status, "[output-stream][write-record-batch]")) { + return metadata_length + body_length; + } else { + return -1; + } +} + + +G_DEFINE_TYPE(GArrowFileOutputStream, + garrow_file_output_stream, + GARROW_TYPE_OUTPUT_STREAM); + +static void +garrow_file_output_stream_init(GArrowFileOutputStream *file_output_stream) +{ +} + +static void +garrow_file_output_stream_class_init(GArrowFileOutputStreamClass *klass) +{ +} + +/** + * garrow_file_output_stream_new: + * @path: The path of the file output stream. + * @append: Whether the path is opened as append mode or recreate mode. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly opened #GArrowFileOutputStream or + * %NULL on error. + */ +GArrowFileOutputStream * +garrow_file_output_stream_new(const gchar *path, + gboolean append, + GError **error) +{ + auto arrow_file_output_stream_result = + arrow::io::FileOutputStream::Open(std::string(path), append); + if (arrow_file_output_stream_result.ok()) { + auto arrow_file_output_stream = + arrow_file_output_stream_result.ValueOrDie(); + return garrow_file_output_stream_new_raw(&arrow_file_output_stream); + } else { + std::string context("[io][file-output-stream][open]: <"); + context += path; + context += ">"; + garrow::check(error, arrow_file_output_stream_result, context.c_str()); + return NULL; + } +} + + +G_DEFINE_TYPE(GArrowBufferOutputStream, + garrow_buffer_output_stream, + GARROW_TYPE_OUTPUT_STREAM); + +static void +garrow_buffer_output_stream_init(GArrowBufferOutputStream *buffer_output_stream) +{ +} + +static void +garrow_buffer_output_stream_class_init(GArrowBufferOutputStreamClass *klass) +{ +} + +/** + * garrow_buffer_output_stream_new: + * @buffer: The resizable buffer to be output. + * + * Returns: (transfer full): A newly created #GArrowBufferOutputStream. + */ +GArrowBufferOutputStream * +garrow_buffer_output_stream_new(GArrowResizableBuffer *buffer) +{ + auto arrow_buffer = garrow_buffer_get_raw(GARROW_BUFFER(buffer)); + auto arrow_resizable_buffer = + std::static_pointer_cast<arrow::ResizableBuffer>(arrow_buffer); + auto arrow_buffer_output_stream = + std::make_shared<arrow::io::BufferOutputStream>(arrow_resizable_buffer); + return garrow_buffer_output_stream_new_raw(&arrow_buffer_output_stream); +} + +G_END_DECLS + + +namespace garrow { + class GIOOutputStream : public arrow::io::OutputStream { + public: + GIOOutputStream(GOutputStream *output_stream) : + output_stream_(output_stream), + position_(0) { + g_object_ref(output_stream_); + } + + ~GIOOutputStream() { + g_object_unref(output_stream_); + } + + GOutputStream *get_output_stream() { + return output_stream_; + } + + bool closed() const override { + return static_cast<bool>(g_output_stream_is_closed(output_stream_)); + } + + arrow::Status Close() override { + GError *error = NULL; + if (g_output_stream_close(output_stream_, NULL, &error)) { + return arrow::Status::OK(); + } else { + return garrow_error_to_status(error, + arrow::StatusCode::IOError, + "[gio-output-stream][close]"); + } + } + + arrow::Result<int64_t> Tell() const override { + if (G_IS_SEEKABLE(output_stream_)) { + return g_seekable_tell(G_SEEKABLE(output_stream_)); + } else { + return position_; + } + } + + arrow::Status Write(const void *data, + int64_t n_bytes) override { + GError *error = NULL; + gsize n_written_bytes; + auto successed = g_output_stream_write_all(output_stream_, + data, + n_bytes, + &n_written_bytes, + NULL, + &error); + if (successed) { + position_ += n_written_bytes; + return arrow::Status::OK(); + } else { + std::stringstream message("[gio-output-stream][write]"); + message << "[" << n_written_bytes << "/" << n_bytes << "]"; + return garrow_error_to_status(error, + arrow::StatusCode::IOError, + message.str().c_str()); + } + } + + arrow::Status Flush() override { + GError *error = NULL; + auto successed = g_output_stream_flush(output_stream_, NULL, &error); + if (successed) { + return arrow::Status::OK(); + } else { + return garrow_error_to_status(error, + arrow::StatusCode::IOError, + "[gio-output-stream][flush]"); + } + } + + private: + GOutputStream *output_stream_; + int64_t position_; + }; +}; + +G_BEGIN_DECLS + +typedef struct GArrowGIOOutputStreamPrivate_ { + GOutputStream *raw; +} GArrowGIOOutputStreamPrivate; + +enum { + PROP_GIO_RAW = 1 +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowGIOOutputStream, + garrow_gio_output_stream, + GARROW_TYPE_OUTPUT_STREAM); + +#define GARROW_GIO_OUTPUT_STREAM_GET_PRIVATE(object) \ + static_cast<GArrowGIOOutputStreamPrivate *>( \ + garrow_gio_output_stream_get_instance_private( \ + GARROW_GIO_OUTPUT_STREAM(object))) + +static void +garrow_gio_output_stream_dispose(GObject *object) +{ + auto priv = GARROW_GIO_OUTPUT_STREAM_GET_PRIVATE(object); + + if (priv->raw) { + g_object_unref(priv->raw); + priv->raw = nullptr; + } + + G_OBJECT_CLASS(garrow_gio_output_stream_parent_class)->dispose(object); +} + +static void +garrow_gio_output_stream_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_GIO_OUTPUT_STREAM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_GIO_RAW: + priv->raw = G_OUTPUT_STREAM(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_gio_output_stream_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_GIO_OUTPUT_STREAM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_GIO_RAW: + g_value_set_object(value, priv->raw); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_gio_output_stream_init(GArrowGIOOutputStream *object) +{ +} + +static void +garrow_gio_output_stream_class_init(GArrowGIOOutputStreamClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_gio_output_stream_dispose; + gobject_class->set_property = garrow_gio_output_stream_set_property; + gobject_class->get_property = garrow_gio_output_stream_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("raw", + "Raw", + "The raw GOutputStream *", + G_TYPE_OUTPUT_STREAM, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_GIO_RAW, spec); +} + +/** + * garrow_gio_output_stream_new: + * @gio_output_stream: The stream to be output. + * + * Returns: (transfer full): A newly created #GArrowGIOOutputStream. + */ +GArrowGIOOutputStream * +garrow_gio_output_stream_new(GOutputStream *gio_output_stream) +{ + auto arrow_output_stream = + std::make_shared<garrow::GIOOutputStream>(gio_output_stream); + auto object = g_object_new(GARROW_TYPE_GIO_OUTPUT_STREAM, + "output-stream", &arrow_output_stream, + "raw", gio_output_stream, + NULL); + auto output_stream = GARROW_GIO_OUTPUT_STREAM(object); + return output_stream; +} + +/** + * garrow_gio_output_stream_get_raw: + * @output_stream: A #GArrowGIOOutputStream. + * + * Returns: (transfer none): The wrapped #GOutputStream. + * + * Since: 0.5.0 + * + * Deprecated: 0.12.0: Use GArrowGIOOutputStream::raw property instead. + */ +GOutputStream * +garrow_gio_output_stream_get_raw(GArrowGIOOutputStream *output_stream) +{ + auto priv = GARROW_GIO_OUTPUT_STREAM_GET_PRIVATE(output_stream); + return priv->raw; +} + +typedef struct GArrowCompressedOutputStreamPrivate_ { + GArrowCodec *codec; + GArrowOutputStream *raw; +} GArrowCompressedOutputStreamPrivate; + +enum { + PROP_CODEC = 1, + PROP_RAW +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowCompressedOutputStream, + garrow_compressed_output_stream, + GARROW_TYPE_OUTPUT_STREAM) + +#define GARROW_COMPRESSED_OUTPUT_STREAM_GET_PRIVATE(object) \ + static_cast<GArrowCompressedOutputStreamPrivate *>( \ + garrow_compressed_output_stream_get_instance_private( \ + GARROW_COMPRESSED_OUTPUT_STREAM(object))) + +static void +garrow_compressed_output_stream_dispose(GObject *object) +{ + auto priv = GARROW_COMPRESSED_OUTPUT_STREAM_GET_PRIVATE(object); + + if (priv->codec) { + g_object_unref(priv->codec); + priv->codec = NULL; + } + + if (priv->raw) { + g_object_unref(priv->raw); + priv->raw = NULL; + } + + G_OBJECT_CLASS(garrow_compressed_output_stream_parent_class)->dispose(object); +} + +static void +garrow_compressed_output_stream_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_COMPRESSED_OUTPUT_STREAM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_CODEC: + priv->codec = GARROW_CODEC(g_value_dup_object(value)); + break; + case PROP_RAW: + priv->raw = GARROW_OUTPUT_STREAM(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_compressed_output_stream_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_COMPRESSED_OUTPUT_STREAM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_CODEC: + g_value_set_object(value, priv->codec); + break; + case PROP_RAW: + g_value_set_object(value, priv->raw); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_compressed_output_stream_init(GArrowCompressedOutputStream *object) +{ +} + +static void +garrow_compressed_output_stream_class_init(GArrowCompressedOutputStreamClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_compressed_output_stream_dispose; + gobject_class->set_property = garrow_compressed_output_stream_set_property; + gobject_class->get_property = garrow_compressed_output_stream_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("codec", + "Codec", + "The codec for the stream", + GARROW_TYPE_CODEC, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_CODEC, spec); + + spec = g_param_spec_object("raw", + "Raw", + "The underlying raw output stream", + GARROW_TYPE_OUTPUT_STREAM, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_RAW, spec); +} + +/** + * garrow_compressed_output_stream_new: + * @codec: A #GArrowCodec for compressed data in the @raw. + * @raw: A #GArrowOutputStream that is a sink for compressed data. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: A newly created #GArrowCompressedOutputStream. + * + * Since: 0.12.0 + */ +GArrowCompressedOutputStream * +garrow_compressed_output_stream_new(GArrowCodec *codec, + GArrowOutputStream *raw, + GError **error) +{ + auto arrow_codec = garrow_codec_get_raw(codec).get(); + auto arrow_raw = garrow_output_stream_get_raw(raw); + auto arrow_stream = arrow::io::CompressedOutputStream::Make(arrow_codec, + arrow_raw); + if (garrow::check(error, arrow_stream, "[compressed-output-stream][new]")) { + return garrow_compressed_output_stream_new_raw(&(arrow_stream.ValueOrDie()), + codec, + raw); + } else { + return NULL; + } +} + +G_END_DECLS + + +GArrowOutputStream * +garrow_output_stream_new_raw(std::shared_ptr<arrow::io::OutputStream> *arrow_output_stream) +{ + auto output_stream = + GARROW_OUTPUT_STREAM(g_object_new(GARROW_TYPE_OUTPUT_STREAM, + "output-stream", arrow_output_stream, + NULL)); + return output_stream; +} + +std::shared_ptr<arrow::io::OutputStream> +garrow_output_stream_get_raw(GArrowOutputStream *output_stream) +{ + auto priv = GARROW_OUTPUT_STREAM_GET_PRIVATE(output_stream); + return priv->output_stream; +} + + +GArrowFileOutputStream * +garrow_file_output_stream_new_raw(std::shared_ptr<arrow::io::FileOutputStream> *arrow_file_output_stream) +{ + auto file_output_stream = + GARROW_FILE_OUTPUT_STREAM(g_object_new(GARROW_TYPE_FILE_OUTPUT_STREAM, + "output-stream", arrow_file_output_stream, + NULL)); + return file_output_stream; +} + +GArrowBufferOutputStream * +garrow_buffer_output_stream_new_raw(std::shared_ptr<arrow::io::BufferOutputStream> *arrow_buffer_output_stream) +{ + auto buffer_output_stream = + GARROW_BUFFER_OUTPUT_STREAM(g_object_new(GARROW_TYPE_BUFFER_OUTPUT_STREAM, + "output-stream", arrow_buffer_output_stream, + NULL)); + return buffer_output_stream; +} + +GArrowCompressedOutputStream * +garrow_compressed_output_stream_new_raw(std::shared_ptr<arrow::io::CompressedOutputStream> *arrow_raw, + GArrowCodec *codec, + GArrowOutputStream *raw) +{ + auto compressed_output_stream = + g_object_new(GARROW_TYPE_COMPRESSED_OUTPUT_STREAM, + "output-stream", arrow_raw, + "codec", codec, + "raw", raw, + NULL); + return GARROW_COMPRESSED_OUTPUT_STREAM(compressed_output_stream); +} + +std::shared_ptr<arrow::io::OutputStream> +garrow_compressed_output_stream_get_raw(GArrowCompressedOutputStream *compressed_output_stream) +{ + auto output_stream = GARROW_OUTPUT_STREAM(compressed_output_stream); + auto arrow_output_stream = garrow_output_stream_get_raw(output_stream); + auto arrow_compressed_output_stream = + std::static_pointer_cast<arrow::io::CompressedOutputStream>(arrow_output_stream); + return arrow_compressed_output_stream->raw(); +} diff --git a/src/arrow/c_glib/arrow-glib/output-stream.h b/src/arrow/c_glib/arrow-glib/output-stream.h new file mode 100644 index 000000000..eeef24891 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/output-stream.h @@ -0,0 +1,227 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <gio/gio.h> + +#include <arrow-glib/buffer.h> +#include <arrow-glib/codec.h> +#include <arrow-glib/ipc-options.h> +#include <arrow-glib/record-batch.h> +#include <arrow-glib/tensor.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_OUTPUT_STREAM (garrow_output_stream_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowOutputStream, + garrow_output_stream, + GARROW, + OUTPUT_STREAM, + GObject) +struct _GArrowOutputStreamClass +{ + GObjectClass parent_class; +}; + +gboolean garrow_output_stream_align(GArrowOutputStream *stream, + gint32 alignment, + GError **error); +gint64 garrow_output_stream_write_tensor(GArrowOutputStream *stream, + GArrowTensor *tensor, + GError **error); +GARROW_AVAILABLE_IN_1_0 +gint64 +garrow_output_stream_write_record_batch(GArrowOutputStream *stream, + GArrowRecordBatch *record_batch, + GArrowWriteOptions *options, + GError **error); + + +#define GARROW_TYPE_FILE_OUTPUT_STREAM \ + (garrow_file_output_stream_get_type()) +#define GARROW_FILE_OUTPUT_STREAM(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST((obj), \ + GARROW_TYPE_FILE_OUTPUT_STREAM, \ + GArrowFileOutputStream)) +#define GARROW_FILE_OUTPUT_STREAM_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST((klass), \ + GARROW_TYPE_FILE_OUTPUT_STREAM, \ + GArrowFileOutputStreamClass)) +#define GARROW_IS_FILE_OUTPUT_STREAM(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE((obj), \ + GARROW_TYPE_FILE_OUTPUT_STREAM)) +#define GARROW_IS_FILE_OUTPUT_STREAM_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE((klass), \ + GARROW_TYPE_FILE_OUTPUT_STREAM)) +#define GARROW_FILE_OUTPUT_STREAM_GET_CLASS(obj) \ + (G_TYPE_INSTANCE_GET_CLASS((obj), \ + GARROW_TYPE_FILE_OUTPUT_STREAM, \ + GArrowFileOutputStreamClass)) + +typedef struct _GArrowFileOutputStream GArrowFileOutputStream; +#ifndef __GTK_DOC_IGNORE__ +typedef struct _GArrowFileOutputStreamClass GArrowFileOutputStreamClass; +#endif + +/** + * GArrowFileOutputStream: + * + * It wraps `arrow::io::FileOutputStream`. + */ +struct _GArrowFileOutputStream +{ + /*< private >*/ + GArrowOutputStream parent_instance; +}; + +#ifndef __GTK_DOC_IGNORE__ +struct _GArrowFileOutputStreamClass +{ + GArrowOutputStreamClass parent_class; +}; +#endif + +GType garrow_file_output_stream_get_type(void) G_GNUC_CONST; + +GArrowFileOutputStream *garrow_file_output_stream_new(const gchar *path, + gboolean append, + GError **error); + + +#define GARROW_TYPE_BUFFER_OUTPUT_STREAM \ + (garrow_buffer_output_stream_get_type()) +#define GARROW_BUFFER_OUTPUT_STREAM(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST((obj), \ + GARROW_TYPE_BUFFER_OUTPUT_STREAM, \ + GArrowBufferOutputStream)) +#define GARROW_BUFFER_OUTPUT_STREAM_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST((klass), \ + GARROW_TYPE_BUFFER_OUTPUT_STREAM, \ + GArrowBufferOutputStreamClass)) +#define GARROW_IS_BUFFER_OUTPUT_STREAM(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE((obj), \ + GARROW_TYPE_BUFFER_OUTPUT_STREAM)) +#define GARROW_IS_BUFFER_OUTPUT_STREAM_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE((klass), \ + GARROW_TYPE_BUFFER_OUTPUT_STREAM)) +#define GARROW_BUFFER_OUTPUT_STREAM_GET_CLASS(obj) \ + (G_TYPE_INSTANCE_GET_CLASS((obj), \ + GARROW_TYPE_BUFFER_OUTPUT_STREAM, \ + GArrowBufferOutputStreamClass)) + +typedef struct _GArrowBufferOutputStream GArrowBufferOutputStream; +#ifndef __GTK_DOC_IGNORE__ +typedef struct _GArrowBufferOutputStreamClass GArrowBufferOutputStreamClass; +#endif + +/** + * GArrowBufferOutputStream: + * + * It wraps `arrow::io::BufferOutputStream`. + */ +struct _GArrowBufferOutputStream +{ + /*< private >*/ + GArrowOutputStream parent_instance; +}; + +#ifndef __GTK_DOC_IGNORE__ +struct _GArrowBufferOutputStreamClass +{ + GArrowOutputStreamClass parent_class; +}; +#endif + +GType garrow_buffer_output_stream_get_type(void) G_GNUC_CONST; + +GArrowBufferOutputStream *garrow_buffer_output_stream_new(GArrowResizableBuffer *buffer); + + +#define GARROW_TYPE_GIO_OUTPUT_STREAM \ + (garrow_gio_output_stream_get_type()) +#define GARROW_GIO_OUTPUT_STREAM(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST((obj), \ + GARROW_TYPE_GIO_OUTPUT_STREAM, \ + GArrowGIOOutputStream)) +#define GARROW_GIO_OUTPUT_STREAM_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST((klass), \ + GARROW_TYPE_GIO_OUTPUT_STREAM, \ + GArrowGIOOutputStreamClass)) +#define GARROW_IS_GIO_OUTPUT_STREAM(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE((obj), \ + GARROW_TYPE_GIO_OUTPUT_STREAM)) +#define GARROW_IS_GIO_OUTPUT_STREAM_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE((klass), \ + GARROW_TYPE_GIO_OUTPUT_STREAM)) +#define GARROW_GIO_OUTPUT_STREAM_GET_CLASS(obj) \ + (G_TYPE_INSTANCE_GET_CLASS((obj), \ + GARROW_TYPE_GIO_OUTPUT_STREAM, \ + GArrowGIOOutputStreamClass)) + +typedef struct _GArrowGIOOutputStream GArrowGIOOutputStream; +#ifndef __GTK_DOC_IGNORE__ +typedef struct _GArrowGIOOutputStreamClass GArrowGIOOutputStreamClass; +#endif + +/** + * GArrowGIOOutputStream: + * + * It's an output stream for `GOutputStream`. + */ +struct _GArrowGIOOutputStream +{ + /*< private >*/ + GArrowOutputStream parent_instance; +}; + +#ifndef __GTK_DOC_IGNORE__ +struct _GArrowGIOOutputStreamClass +{ + GArrowOutputStreamClass parent_class; +}; +#endif + +GType garrow_gio_output_stream_get_type(void) G_GNUC_CONST; + +GArrowGIOOutputStream *garrow_gio_output_stream_new(GOutputStream *gio_output_stream); +#ifndef GARROW_DISABLE_DEPRECATED +G_GNUC_DEPRECATED +GOutputStream * +garrow_gio_output_stream_get_raw(GArrowGIOOutputStream *output_stream); +#endif + +#define GARROW_TYPE_COMPRESSED_OUTPUT_STREAM \ + (garrow_compressed_output_stream_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowCompressedOutputStream, + garrow_compressed_output_stream, + GARROW, + COMPRESSED_OUTPUT_STREAM, + GArrowOutputStream) +struct _GArrowCompressedOutputStreamClass +{ + GArrowOutputStreamClass parent_class; +}; + +GArrowCompressedOutputStream * +garrow_compressed_output_stream_new(GArrowCodec *codec, + GArrowOutputStream *raw, + GError **error); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/output-stream.hpp b/src/arrow/c_glib/arrow-glib/output-stream.hpp new file mode 100644 index 000000000..b39b3bdfc --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/output-stream.hpp @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/io/compressed.h> +#include <arrow/io/file.h> +#include <arrow/io/memory.h> + +#include <arrow-glib/output-stream.h> + +GArrowOutputStream *garrow_output_stream_new_raw(std::shared_ptr<arrow::io::OutputStream> *arrow_output_stream); +std::shared_ptr<arrow::io::OutputStream> garrow_output_stream_get_raw(GArrowOutputStream *output_stream); + + +GArrowFileOutputStream *garrow_file_output_stream_new_raw(std::shared_ptr<arrow::io::FileOutputStream> *arrow_file_output_stream); +GArrowBufferOutputStream *garrow_buffer_output_stream_new_raw(std::shared_ptr<arrow::io::BufferOutputStream> *arrow_buffer_output_stream); + +GArrowCompressedOutputStream * +garrow_compressed_output_stream_new_raw(std::shared_ptr<arrow::io::CompressedOutputStream> *arrow_raw, + GArrowCodec *codec, + GArrowOutputStream *raw); +std::shared_ptr<arrow::io::OutputStream> +garrow_compressed_output_stream_get_raw(GArrowCompressedOutputStream *stream); diff --git a/src/arrow/c_glib/arrow-glib/readable.cpp b/src/arrow/c_glib/arrow-glib/readable.cpp new file mode 100644 index 000000000..fbe5270c7 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/readable.cpp @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow/api.h> + +#include <arrow-glib/buffer.hpp> +#include <arrow-glib/error.hpp> +#include <arrow-glib/readable.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: readable + * @title: GArrowReadable + * @short_description: Input interface + * + * #GArrowReadable is an interface for input. Input must be + * readable. + */ + +G_DEFINE_INTERFACE(GArrowReadable, + garrow_readable, + G_TYPE_OBJECT) + +static void +garrow_readable_default_init (GArrowReadableInterface *iface) +{ + iface->buffer_new_raw = garrow_buffer_new_raw; +} + +/** + * garrow_readable_read: + * @readable: A #GArrowReadable. + * @n_bytes: The number of bytes to be read. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): #GArrowBuffer that has read + * data on success, %NULL if there was an error. + */ +GArrowBuffer * +garrow_readable_read(GArrowReadable *readable, + gint64 n_bytes, + GError **error) +{ + const auto arrow_readable = garrow_readable_get_raw(readable); + + auto arrow_buffer = arrow_readable->Read(n_bytes); + if (garrow::check(error, arrow_buffer, "[readable][read]")) { + auto *iface = GARROW_READABLE_GET_IFACE(readable); + return iface->buffer_new_raw(&(arrow_buffer.ValueOrDie())); + } else { + return NULL; + } +} + +/** + * garrow_readable_read_bytes: + * @readable: A #GArrowReadable. + * @n_bytes: The number of bytes to be read. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): #GBytes that has read data on + * success, %NULL if there was an error. + * + * Since: 0.17.0 + */ +GBytes * +garrow_readable_read_bytes(GArrowReadable *readable, + gint64 n_bytes, + GError **error) +{ + const auto arrow_readable = garrow_readable_get_raw(readable); + + auto arrow_buffer_result = arrow_readable->Read(n_bytes); + if (!garrow::check(error, arrow_buffer_result, "[readable][read-bytes]")) { + return NULL; + } + auto arrow_cpu_buffer_result = + arrow::Buffer::ViewOrCopy(*arrow_buffer_result, + arrow::default_cpu_memory_manager()); + if (!garrow::check(error, + arrow_cpu_buffer_result, + "[readable][read-bytes][view-or-copy]")) { + return NULL; + } + auto arrow_cpu_buffer = *arrow_cpu_buffer_result; + return g_bytes_new(arrow_cpu_buffer->data(), + arrow_cpu_buffer->size()); +} + +G_END_DECLS + +std::shared_ptr<arrow::io::Readable> +garrow_readable_get_raw(GArrowReadable *readable) +{ + auto *iface = GARROW_READABLE_GET_IFACE(readable); + return iface->get_raw(readable); +} diff --git a/src/arrow/c_glib/arrow-glib/readable.h b/src/arrow/c_glib/arrow-glib/readable.h new file mode 100644 index 000000000..bb70f4b54 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/readable.h @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/buffer.h> +#include <arrow-glib/gobject-type.h> +#include <arrow-glib/version.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_READABLE (garrow_readable_get_type()) +G_DECLARE_INTERFACE(GArrowReadable, + garrow_readable, + GARROW, + READABLE, + GObject) + +GArrowBuffer *garrow_readable_read(GArrowReadable *readable, + gint64 n_bytes, + GError **error); +GARROW_AVAILABLE_IN_0_17 +GBytes *garrow_readable_read_bytes(GArrowReadable *readable, + gint64 n_bytes, + GError **error); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/readable.hpp b/src/arrow/c_glib/arrow-glib/readable.hpp new file mode 100644 index 000000000..b002de40d --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/readable.hpp @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/io/interfaces.h> + +#include <arrow-glib/readable.h> + +/** + * GArrowReadableInterface: + * + * It wraps `arrow::io::Readable`. + */ +struct _GArrowReadableInterface +{ + GTypeInterface parent_iface; + + GArrowBuffer *(*buffer_new_raw)(std::shared_ptr<arrow::Buffer> *arrow_buffer); + std::shared_ptr<arrow::io::Readable> (*get_raw)(GArrowReadable *file); +}; + +std::shared_ptr<arrow::io::Readable> garrow_readable_get_raw(GArrowReadable *readable); diff --git a/src/arrow/c_glib/arrow-glib/reader.cpp b/src/arrow/c_glib/arrow-glib/reader.cpp new file mode 100644 index 000000000..980382480 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/reader.cpp @@ -0,0 +1,2322 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/array.hpp> +#include <arrow-glib/chunked-array.hpp> +#include <arrow-glib/data-type.hpp> +#include <arrow-glib/enums.h> +#include <arrow-glib/error.hpp> +#include <arrow-glib/input-stream.hpp> +#include <arrow-glib/internal-index.hpp> +#include <arrow-glib/metadata-version.hpp> +#include <arrow-glib/reader.hpp> +#include <arrow-glib/record-batch.hpp> +#include <arrow-glib/schema.hpp> +#include <arrow-glib/table.hpp> + +#include <arrow/c/bridge.h> + +G_BEGIN_DECLS + +/** + * SECTION: reader + * @section_id: reader-classes + * @title: Reader classes + * @include: arrow-glib/arrow-glib.h + * + * #GArrowRecordBatchReader is a base class for reading record batches + * in stream format from input. + * + * #GArrowRecordBatchStreamReader is a class for reading record + * batches in stream format from input synchronously. + * + * #GArrowRecordBatchFileReader is a class for reading record + * batches in file format from input. + * + * #GArrowFeatherFileReader is a class for reading columns in Feather + * file format from input. + * + * #GArrowCSVReader is a class for reading table in CSV format from + * input. + * + * #GArrowJSONReader is a class for reading table in JSON format from + * input. + */ + +typedef struct GArrowRecordBatchReaderPrivate_ { + std::shared_ptr<arrow::ipc::RecordBatchReader> record_batch_reader; +} GArrowRecordBatchReaderPrivate; + +enum { + PROP_0, + PROP_RECORD_BATCH_READER +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowRecordBatchReader, + garrow_record_batch_reader, + G_TYPE_OBJECT); + +#define GARROW_RECORD_BATCH_READER_GET_PRIVATE(obj) \ + static_cast<GArrowRecordBatchReaderPrivate *>( \ + garrow_record_batch_reader_get_instance_private( \ + GARROW_RECORD_BATCH_READER(obj))) + +static void +garrow_record_batch_reader_finalize(GObject *object) +{ + auto priv = GARROW_RECORD_BATCH_READER_GET_PRIVATE(object); + + priv->record_batch_reader.~shared_ptr(); + + G_OBJECT_CLASS(garrow_record_batch_reader_parent_class)->finalize(object); +} + +static void +garrow_record_batch_reader_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_RECORD_BATCH_READER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_RECORD_BATCH_READER: + priv->record_batch_reader = + *static_cast<std::shared_ptr<arrow::ipc::RecordBatchReader> *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_record_batch_reader_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + switch (prop_id) { + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_record_batch_reader_init(GArrowRecordBatchReader *object) +{ + auto priv = GARROW_RECORD_BATCH_READER_GET_PRIVATE(object); + new(&priv->record_batch_reader) std::shared_ptr<arrow::ipc::RecordBatchReader>; +} + +static void +garrow_record_batch_reader_class_init(GArrowRecordBatchReaderClass *klass) +{ + GObjectClass *gobject_class; + GParamSpec *spec; + + gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_record_batch_reader_finalize; + gobject_class->set_property = garrow_record_batch_reader_set_property; + gobject_class->get_property = garrow_record_batch_reader_get_property; + + spec = g_param_spec_pointer("record-batch-reader", + "arrow::ipc::RecordBatchReader", + "The raw std::shared<arrow::ipc::RecordBatchRecordBatchReader> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_RECORD_BATCH_READER, spec); +} + +/** + * garrow_record_batch_reader_import: + * @c_abi_array_stream: (not nullable): A `struct ArrowArrayStream *`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): An imported + * #GArrowRecordBatchReader on success, %NULL on error. + * + * You don't need to release the passed `struct ArrowArrayStream *`, + * even if this function reports an error. + * + * Since: 6.0.0 + */ +GArrowRecordBatchReader * +garrow_record_batch_reader_import(gpointer c_abi_array_stream, GError **error) +{ + auto arrow_reader_result = + arrow::ImportRecordBatchReader( + static_cast<ArrowArrayStream *>(c_abi_array_stream)); + if (garrow::check(error, + arrow_reader_result, + "[record-batch-reader][import]")) { + return garrow_record_batch_reader_new_raw(&(*arrow_reader_result)); + } else { + return NULL; + } +} + +/** + * garrow_record_batch_reader_new: + * @record_batches: (element-type GArrowRecordBatch): + * A list of #GArrowRecordBatch. + * @schema: (nullable): A #GArrowSchema to confirm to. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: The schema in the stream on success, %NULL on error. + * + * Since: 6.0.0 + */ +GArrowRecordBatchReader * +garrow_record_batch_reader_new(GList *record_batches, + GArrowSchema *schema, + GError **error) +{ + std::vector<std::shared_ptr<arrow::RecordBatch>> arrow_record_batches; + for (auto node = record_batches; node; node = node->next) { + auto record_batch = GARROW_RECORD_BATCH(node->data); + arrow_record_batches.push_back(garrow_record_batch_get_raw(record_batch)); + } + std::shared_ptr<arrow::Schema> arrow_schema; + if (schema) { + arrow_schema = garrow_schema_get_raw(schema); + } + auto arrow_reader_result = + arrow::RecordBatchReader::Make(arrow_record_batches, arrow_schema); + if (garrow::check(error, + arrow_reader_result, + "[record-batch-stream-reader][new]")) { + return garrow_record_batch_reader_new_raw(&*arrow_reader_result); + } else { + return NULL; + } +} + +/** + * garrow_record_batch_reader_export: + * @reader: A #GArrowRecordBatchReader. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): An exported + * #GArrowRecordBatchReader as `struct ArrowArrayStream *` on + * success, %NULL on error. + * + * It should be freed with the `ArrowArrayStream::release` callback then + * g_free() when no longer needed. + * + * Since: 6.0.0 + */ +gpointer +garrow_record_batch_reader_export(GArrowRecordBatchReader *reader, + GError **error) +{ + auto arrow_reader = garrow_record_batch_reader_get_raw(reader); + auto c_abi_array_stream = g_new(ArrowArrayStream, 1); + auto status = arrow::ExportRecordBatchReader(arrow_reader, + c_abi_array_stream); + if (garrow::check(error, status, "[record-batch-reader][export]")) { + return c_abi_array_stream; + } else { + g_free(c_abi_array_stream); + return NULL; + } +} + +/** + * garrow_record_batch_reader_get_schema: + * @reader: A #GArrowRecordBatchReader. + * + * Returns: (transfer full): The schema in the stream. + * + * Since: 0.4.0 + */ +GArrowSchema * +garrow_record_batch_reader_get_schema(GArrowRecordBatchReader *reader) +{ + auto arrow_reader = garrow_record_batch_reader_get_raw(reader); + auto arrow_schema = arrow_reader->schema(); + return garrow_schema_new_raw(&arrow_schema); +} + +/** + * garrow_record_batch_reader_get_next_record_batch: + * @reader: A #GArrowRecordBatchReader. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): + * The next record batch in the stream or %NULL on end of stream. + * + * Since: 0.4.0 + * + * Deprecated: 0.5.0: + * Use garrow_record_batch_reader_read_next() instead. + */ +GArrowRecordBatch * +garrow_record_batch_reader_get_next_record_batch(GArrowRecordBatchReader *reader, + GError **error) +{ + return garrow_record_batch_reader_read_next(reader, error); +} + +/** + * garrow_record_batch_reader_read_next_record_batch: + * @reader: A #GArrowRecordBatchReader. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): + * The next record batch in the stream or %NULL on end of stream. + * + * Since: 0.5.0 + * + * Deprecated: 0.8.0: + * Use garrow_record_batch_reader_read_next() instead. + */ +GArrowRecordBatch * +garrow_record_batch_reader_read_next_record_batch(GArrowRecordBatchReader *reader, + GError **error) +{ + return garrow_record_batch_reader_read_next(reader, error); +} + +/** + * garrow_record_batch_reader_read_next: + * @reader: A #GArrowRecordBatchReader. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): + * The next record batch in the stream or %NULL on end of stream. + * + * Since: 0.8.0 + */ +GArrowRecordBatch * +garrow_record_batch_reader_read_next(GArrowRecordBatchReader *reader, + GError **error) +{ + auto arrow_reader = garrow_record_batch_reader_get_raw(reader); + std::shared_ptr<arrow::RecordBatch> arrow_record_batch; + auto status = arrow_reader->ReadNext(&arrow_record_batch); + + if (garrow_error_check(error, + status, + "[record-batch-reader][read-next]")) { + if (arrow_record_batch == nullptr) { + return NULL; + } else { + return garrow_record_batch_new_raw(&arrow_record_batch); + } + } else { + return NULL; + } +} + +/** + * garrow_record_batch_reader_read_all: + * @reader: A #GArrowRecordBatchReader. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): + * The all record batches in the stream as #GArrowTable. + * + * Since: 6.0.0 + */ +GArrowTable * +garrow_record_batch_reader_read_all(GArrowRecordBatchReader *reader, + GError **error) +{ + auto arrow_reader = garrow_record_batch_reader_get_raw(reader); + std::shared_ptr<arrow::Table> arrow_table; + auto status = arrow_reader->ReadAll(&arrow_table); + + if (garrow::check(error, + status, + "[record-batch-reader][read-all]")) { + return garrow_table_new_raw(&arrow_table); + } else { + return NULL; + } +} + + +G_DEFINE_TYPE(GArrowTableBatchReader, + garrow_table_batch_reader, + GARROW_TYPE_RECORD_BATCH_READER); + +static void +garrow_table_batch_reader_init(GArrowTableBatchReader *object) +{ +} + +static void +garrow_table_batch_reader_class_init(GArrowTableBatchReaderClass *klass) +{ +} + +/** + * garrow_table_batch_reader_new: + * @table: The table to be read. + * + * Returns: A newly created #GArrowTableBatchReader. + * + * Since: 0.8.0 + */ +GArrowTableBatchReader * +garrow_table_batch_reader_new(GArrowTable *table) +{ + auto arrow_table = garrow_table_get_raw(table); + auto arrow_table_batch_reader = + std::make_shared<arrow::TableBatchReader>(*arrow_table); + return garrow_table_batch_reader_new_raw(&arrow_table_batch_reader); +} + + +G_DEFINE_TYPE(GArrowRecordBatchStreamReader, + garrow_record_batch_stream_reader, + GARROW_TYPE_RECORD_BATCH_READER); + +static void +garrow_record_batch_stream_reader_init(GArrowRecordBatchStreamReader *object) +{ +} + +static void +garrow_record_batch_stream_reader_class_init(GArrowRecordBatchStreamReaderClass *klass) +{ +} + +/** + * garrow_record_batch_stream_reader_new: + * @stream: The stream to be read. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowRecordBatchStreamReader + * or %NULL on error. + * + * Since: 0.4.0 + */ +GArrowRecordBatchStreamReader * +garrow_record_batch_stream_reader_new(GArrowInputStream *stream, + GError **error) +{ + using ReaderType = arrow::ipc::RecordBatchStreamReader; + + auto arrow_input_stream = garrow_input_stream_get_raw(stream); + auto arrow_reader = ReaderType::Open(arrow_input_stream); + if (garrow::check(error, arrow_reader, "[record-batch-stream-reader][open]")) { + auto subtype = std::dynamic_pointer_cast<ReaderType>(*arrow_reader); + return garrow_record_batch_stream_reader_new_raw(&subtype); + } else { + return NULL; + } +} + + +typedef struct GArrowRecordBatchFileReaderPrivate_ { + std::shared_ptr<arrow::ipc::RecordBatchFileReader> record_batch_file_reader; +} GArrowRecordBatchFileReaderPrivate; + +enum { + PROP_0_, + PROP_RECORD_BATCH_FILE_READER +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowRecordBatchFileReader, + garrow_record_batch_file_reader, + G_TYPE_OBJECT); + +#define GARROW_RECORD_BATCH_FILE_READER_GET_PRIVATE(obj) \ + static_cast<GArrowRecordBatchFileReaderPrivate *>( \ + garrow_record_batch_file_reader_get_instance_private( \ + GARROW_RECORD_BATCH_FILE_READER(obj))) + +static void +garrow_record_batch_file_reader_finalize(GObject *object) +{ + auto priv = GARROW_RECORD_BATCH_FILE_READER_GET_PRIVATE(object); + + priv->record_batch_file_reader.~shared_ptr(); + + G_OBJECT_CLASS(garrow_record_batch_file_reader_parent_class)->finalize(object); +} + +static void +garrow_record_batch_file_reader_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_RECORD_BATCH_FILE_READER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_RECORD_BATCH_FILE_READER: + priv->record_batch_file_reader = + *static_cast<std::shared_ptr<arrow::ipc::RecordBatchFileReader> *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_record_batch_file_reader_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + switch (prop_id) { + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_record_batch_file_reader_init(GArrowRecordBatchFileReader *object) +{ + auto priv = GARROW_RECORD_BATCH_FILE_READER_GET_PRIVATE(object); + new(&priv->record_batch_file_reader) + std::shared_ptr<arrow::ipc::RecordBatchFileReader>; +} + +static void +garrow_record_batch_file_reader_class_init(GArrowRecordBatchFileReaderClass *klass) +{ + GObjectClass *gobject_class; + GParamSpec *spec; + + gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_record_batch_file_reader_finalize; + gobject_class->set_property = garrow_record_batch_file_reader_set_property; + gobject_class->get_property = garrow_record_batch_file_reader_get_property; + + spec = g_param_spec_pointer("record-batch-file-reader", + "arrow::ipc::RecordBatchFileReader", + "The raw std::shared<arrow::ipc::RecordBatchFileReader> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_RECORD_BATCH_FILE_READER, spec); +} + + +/** + * garrow_record_batch_file_reader_new: + * @file: The file to be read. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowRecordBatchFileReader + * or %NULL on error. + * + * Since: 0.4.0 + */ +GArrowRecordBatchFileReader * +garrow_record_batch_file_reader_new(GArrowSeekableInputStream *file, + GError **error) +{ + using ReaderType = arrow::ipc::RecordBatchFileReader; + + auto arrow_random_access_file = garrow_seekable_input_stream_get_raw(file); + auto arrow_reader = ReaderType::Open(arrow_random_access_file); + if (garrow::check(error, arrow_reader, "[record-batch-file-reader][open]")) { + auto subtype = std::dynamic_pointer_cast<ReaderType>(*arrow_reader); + return garrow_record_batch_file_reader_new_raw(&subtype); + } else { + return NULL; + } +} + +/** + * garrow_record_batch_file_reader_get_schema: + * @reader: A #GArrowRecordBatchFileReader. + * + * Returns: (transfer full): The schema in the file. + * + * Since: 0.4.0 + */ +GArrowSchema * +garrow_record_batch_file_reader_get_schema(GArrowRecordBatchFileReader *reader) +{ + auto arrow_reader = garrow_record_batch_file_reader_get_raw(reader); + auto arrow_schema = arrow_reader->schema(); + return garrow_schema_new_raw(&arrow_schema); +} + +/** + * garrow_record_batch_file_reader_get_n_record_batches: + * @reader: A #GArrowRecordBatchFileReader. + * + * Returns: The number of record batches in the file. + * + * Since: 0.4.0 + */ +guint +garrow_record_batch_file_reader_get_n_record_batches(GArrowRecordBatchFileReader *reader) +{ + auto arrow_reader = garrow_record_batch_file_reader_get_raw(reader); + return arrow_reader->num_record_batches(); +} + +/** + * garrow_record_batch_file_reader_get_version: + * @reader: A #GArrowRecordBatchFileReader. + * + * Returns: The format version in the file. + * + * Since: 0.4.0 + */ +GArrowMetadataVersion +garrow_record_batch_file_reader_get_version(GArrowRecordBatchFileReader *reader) +{ + auto arrow_reader = garrow_record_batch_file_reader_get_raw(reader); + auto arrow_version = arrow_reader->version(); + return garrow_metadata_version_from_raw(arrow_version); +} + +/** + * garrow_record_batch_file_reader_get_record_batch: + * @reader: A #GArrowRecordBatchFileReader. + * @i: The index of the target record batch. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): + * The i-th record batch in the file or %NULL on error. + * + * Since: 0.4.0 + * + * Deprecated: 0.5.0: + * Use garrow_record_batch_file_reader_read_record_batch() instead. + */ +GArrowRecordBatch * +garrow_record_batch_file_reader_get_record_batch(GArrowRecordBatchFileReader *reader, + guint i, + GError **error) +{ + return garrow_record_batch_file_reader_read_record_batch(reader, i, error); +} + +/** + * garrow_record_batch_file_reader_read_record_batch: + * @reader: A #GArrowRecordBatchFileReader. + * @i: The index of the target record batch. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): + * The i-th record batch in the file or %NULL on error. + * + * Since: 0.5.0 + */ +GArrowRecordBatch * +garrow_record_batch_file_reader_read_record_batch(GArrowRecordBatchFileReader *reader, + guint i, + GError **error) +{ + auto arrow_reader = garrow_record_batch_file_reader_get_raw(reader); + auto arrow_record_batch = arrow_reader->ReadRecordBatch(i); + + if (garrow::check(error, arrow_record_batch, + "[record-batch-file-reader][read-record-batch]")) { + return garrow_record_batch_new_raw(&(*arrow_record_batch)); + } else { + return NULL; + } +} + + +typedef struct GArrowFeatherFileReaderPrivate_ { + std::shared_ptr<arrow::ipc::feather::Reader> feather_reader; +} GArrowFeatherFileReaderPrivate; + +enum { + PROP_FEATHER_READER = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowFeatherFileReader, + garrow_feather_file_reader, + G_TYPE_OBJECT); + +#define GARROW_FEATHER_FILE_READER_GET_PRIVATE(obj) \ + static_cast<GArrowFeatherFileReaderPrivate *>( \ + garrow_feather_file_reader_get_instance_private( \ + GARROW_FEATHER_FILE_READER(obj))) + +static void +garrow_feather_file_reader_finalize(GObject *object) +{ + auto priv = GARROW_FEATHER_FILE_READER_GET_PRIVATE(object); + + priv->feather_reader.~shared_ptr(); + + G_OBJECT_CLASS(garrow_feather_file_reader_parent_class)->finalize(object); +} + +static void +garrow_feather_file_reader_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_FEATHER_FILE_READER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_FEATHER_READER: + priv->feather_reader = + *static_cast<std::shared_ptr<arrow::ipc::feather::Reader> *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_feather_file_reader_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + switch (prop_id) { + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_feather_file_reader_init(GArrowFeatherFileReader *object) +{ +} + +static void +garrow_feather_file_reader_class_init(GArrowFeatherFileReaderClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_feather_file_reader_finalize; + gobject_class->set_property = garrow_feather_file_reader_set_property; + gobject_class->get_property = garrow_feather_file_reader_get_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("feather-reader", + "arrow::ipc::feather::Reader", + "The raw std::shared<arrow::ipc::feather::Reader> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_FEATHER_READER, spec); +} + + +/** + * garrow_feather_file_reader_new: + * @file: The file to be read. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowFeatherFileReader + * or %NULL on error. + * + * Since: 0.4.0 + */ +GArrowFeatherFileReader * +garrow_feather_file_reader_new(GArrowSeekableInputStream *file, + GError **error) +{ + auto arrow_random_access_file = garrow_seekable_input_stream_get_raw(file); + auto reader = arrow::ipc::feather::Reader::Open(arrow_random_access_file); + if (garrow::check(error, reader, "[feather-file-reader][new]")) { + return garrow_feather_file_reader_new_raw(&(*reader)); + } else { + return NULL; + } +} + +/** + * garrow_feather_file_reader_get_version: + * @reader: A #GArrowFeatherFileReader. + * + * Returns: The format version of the file. + * + * Since: 0.4.0 + */ +gint +garrow_feather_file_reader_get_version(GArrowFeatherFileReader *reader) +{ + auto arrow_reader = garrow_feather_file_reader_get_raw(reader); + return arrow_reader->version(); +} + +/** + * garrow_feather_file_reader_read: + * @reader: A #GArrowFeatherFileReader. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): The table in the file that has all columns. + * + * Since: 0.12.0 + */ +GArrowTable * +garrow_feather_file_reader_read(GArrowFeatherFileReader *reader, + GError **error) +{ + auto arrow_reader = garrow_feather_file_reader_get_raw(reader); + std::shared_ptr<arrow::Table> arrow_table; + auto status = arrow_reader->Read(&arrow_table); + if (garrow_error_check(error, status, "[feather-file-reader][read]")) { + return garrow_table_new_raw(&arrow_table); + } else { + return NULL; + } +} + +/** + * garrow_feather_file_reader_read_indices: + * @reader: A #GArrowFeatherFileReader. + * @indices: (array length=n_indices): The indices of column to be read. + * @n_indices: The number of indices. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): The table in the file that has only the + * specified columns. + * + * Since: 0.12.0 + */ +GArrowTable * +garrow_feather_file_reader_read_indices(GArrowFeatherFileReader *reader, + const gint *indices, + guint n_indices, + GError **error) +{ + auto arrow_reader = garrow_feather_file_reader_get_raw(reader); + std::vector<int> cpp_indices(n_indices); + for (guint i = 0; i < n_indices; ++i) { + cpp_indices.push_back(indices[i]); + } + std::shared_ptr<arrow::Table> arrow_table; + auto status = arrow_reader->Read(cpp_indices, &arrow_table); + if (garrow_error_check(error, status, "[feather-file-reader][read-indices]")) { + return garrow_table_new_raw(&arrow_table); + } else { + return NULL; + } +} + +/** + * garrow_feather_file_reader_read_names: + * @reader: A #GArrowFeatherFileReader. + * @names: (array length=n_names): The names of column to be read. + * @n_names: The number of names. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): The table in the file that has only the + * specified columns. + * + * Since: 0.12.0 + */ +GArrowTable * +garrow_feather_file_reader_read_names(GArrowFeatherFileReader *reader, + const gchar **names, + guint n_names, + GError **error) +{ + auto arrow_reader = garrow_feather_file_reader_get_raw(reader); + std::vector<std::string> cpp_names; + for (guint i = 0; i < n_names; ++i) { + cpp_names.push_back(names[i]); + } + std::shared_ptr<arrow::Table> arrow_table; + auto status = arrow_reader->Read(cpp_names, &arrow_table); + if (garrow_error_check(error, status, "[feather-file-reader][read-names]")) { + return garrow_table_new_raw(&arrow_table); + } else { + return NULL; + } +} + + +typedef struct GArrowCSVReadOptionsPrivate_ { + arrow::csv::ReadOptions read_options; + arrow::csv::ParseOptions parse_options; + arrow::csv::ConvertOptions convert_options; +} GArrowCSVReadOptionsPrivate; + +enum { + PROP_USE_THREADS = 1, + PROP_BLOCK_SIZE, + PROP_N_SKIP_ROWS, + PROP_DELIMITER, + PROP_IS_QUOTED, + PROP_QUOTE_CHARACTER, + PROP_IS_DOUBLE_QUOTED, + PROP_IS_ESCAPED, + PROP_ESCAPE_CHARACTER, + PROP_ALLOW_NEWLINES_IN_VALUES, + PROP_IGNORE_EMPTY_LINES, + PROP_CHECK_UTF8, + PROP_ALLOW_NULL_STRINGS, + PROP_GENERATE_COLUMN_NAMES +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowCSVReadOptions, + garrow_csv_read_options, + G_TYPE_OBJECT) + +#define GARROW_CSV_READ_OPTIONS_GET_PRIVATE(object) \ + static_cast<GArrowCSVReadOptionsPrivate *>( \ + garrow_csv_read_options_get_instance_private( \ + GARROW_CSV_READ_OPTIONS(object))) + +static void +garrow_csv_read_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_USE_THREADS: + priv->read_options.use_threads = g_value_get_boolean(value); + break; + case PROP_BLOCK_SIZE: + priv->read_options.block_size = g_value_get_int(value); + break; + case PROP_N_SKIP_ROWS: + priv->read_options.skip_rows = g_value_get_uint(value); + break; + case PROP_GENERATE_COLUMN_NAMES: + priv->read_options.autogenerate_column_names = g_value_get_boolean(value); + break; + case PROP_DELIMITER: + priv->parse_options.delimiter = g_value_get_schar(value); + break; + case PROP_IS_QUOTED: + priv->parse_options.quoting = g_value_get_boolean(value); + break; + case PROP_QUOTE_CHARACTER: + priv->parse_options.quote_char = g_value_get_schar(value); + break; + case PROP_IS_DOUBLE_QUOTED: + priv->parse_options.double_quote = g_value_get_boolean(value); + break; + case PROP_IS_ESCAPED: + priv->parse_options.escaping = g_value_get_boolean(value); + break; + case PROP_ESCAPE_CHARACTER: + priv->parse_options.escape_char = g_value_get_schar(value); + break; + case PROP_ALLOW_NEWLINES_IN_VALUES: + priv->parse_options.newlines_in_values = g_value_get_boolean(value); + break; + case PROP_IGNORE_EMPTY_LINES: + priv->parse_options.ignore_empty_lines = g_value_get_boolean(value); + break; + case PROP_CHECK_UTF8: + priv->convert_options.check_utf8 = g_value_get_boolean(value); + break; + case PROP_ALLOW_NULL_STRINGS: + priv->convert_options.strings_can_be_null = g_value_get_boolean(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_csv_read_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_USE_THREADS: + g_value_set_boolean(value, priv->read_options.use_threads); + break; + case PROP_BLOCK_SIZE: + g_value_set_int(value, priv->read_options.block_size); + break; + case PROP_N_SKIP_ROWS: + g_value_set_uint(value, priv->read_options.skip_rows); + break; + case PROP_GENERATE_COLUMN_NAMES: + g_value_set_boolean(value, priv->read_options.autogenerate_column_names); + break; + case PROP_DELIMITER: + g_value_set_schar(value, priv->parse_options.delimiter); + break; + case PROP_IS_QUOTED: + g_value_set_boolean(value, priv->parse_options.quoting); + break; + case PROP_QUOTE_CHARACTER: + g_value_set_schar(value, priv->parse_options.quote_char); + break; + case PROP_IS_DOUBLE_QUOTED: + g_value_set_boolean(value, priv->parse_options.double_quote); + break; + case PROP_IS_ESCAPED: + g_value_set_boolean(value, priv->parse_options.escaping); + break; + case PROP_ESCAPE_CHARACTER: + g_value_set_schar(value, priv->parse_options.escape_char); + break; + case PROP_ALLOW_NEWLINES_IN_VALUES: + g_value_set_boolean(value, priv->parse_options.newlines_in_values); + break; + case PROP_IGNORE_EMPTY_LINES: + g_value_set_boolean(value, priv->parse_options.ignore_empty_lines); + break; + case PROP_CHECK_UTF8: + g_value_set_boolean(value, priv->convert_options.check_utf8); + break; + case PROP_ALLOW_NULL_STRINGS: + g_value_set_boolean(value, priv->convert_options.strings_can_be_null); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_csv_read_options_init(GArrowCSVReadOptions *object) +{ + auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(object); + priv->read_options = arrow::csv::ReadOptions::Defaults(); + priv->parse_options = arrow::csv::ParseOptions::Defaults(); + priv->convert_options = arrow::csv::ConvertOptions::Defaults(); +} + +static void +garrow_csv_read_options_class_init(GArrowCSVReadOptionsClass *klass) +{ + GParamSpec *spec; + + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_csv_read_options_set_property; + gobject_class->get_property = garrow_csv_read_options_get_property; + + auto read_options = arrow::csv::ReadOptions::Defaults(); + + /** + * GArrowCSVReadOptions:use-threads: + * + * Whether to use the global CPU thread pool. + * + * Since: 0.12.0 + */ + spec = g_param_spec_boolean("use-threads", + "Use threads", + "Whether to use the global CPU thread pool", + read_options.use_threads, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_USE_THREADS, spec); + + /** + * GArrowCSVReadOptions:block-size: + * + * Block size we request from the IO layer; also determines the size + * of chunks when #GArrowCSVReadOptions:use-threads is %TRUE. + * + * Since: 0.12.0 + */ + spec = g_param_spec_int("block-size", + "Block size", + "Block size we request from the IO layer; " + "also determines the size of chunks " + "when ::use-threads is TRUE", + 0, + G_MAXINT, + read_options.block_size, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_BLOCK_SIZE, spec); + + /** + * GArrowCSVReadOptions:n-skip-rows: + * + * The number of header rows to skip (not including + * the row of column names, if any) + * + * Since: 0.15.0 + */ + spec = g_param_spec_uint("n-skip-rows", + "N skip rows", + "The number of header rows to skip " + "(not including the row of column names, if any)", + 0, + G_MAXUINT, + read_options.skip_rows, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_N_SKIP_ROWS, spec); + + /** + * GArrowCSVReadOptions:generate_column_names: + * + * Whether to autogenerate column names if #GArrowCSVReadOptions:column-names is empty. + * If %TRUE, column names will be of the form 'f0', 'f1'... + * If %FALSE, column names will be read from the first CSV row + * after #GArrowCSVReadOptions:n-skip-rows. + * + * Since: 0.15.0 + */ + spec = g_param_spec_boolean("generate-column-names", + "Generate column names", + "Whether to autogenerate column names if column-names is empty. " + "If TRUE, column names will be of the form 'f0', 'f1'... " + "If FALSE, column names will be read from the first CSV row " + "after n-skip-rows", + read_options.autogenerate_column_names, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_GENERATE_COLUMN_NAMES, spec); + + + auto parse_options = arrow::csv::ParseOptions::Defaults(); + + /** + * GArrowCSVReadOptions:delimiter: + * + * Field delimiter character. + * + * Since: 0.12.0 + */ + spec = g_param_spec_char("delimiter", + "Delimiter", + "Field delimiter character", + 0, + G_MAXINT8, + parse_options.delimiter, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_DELIMITER, spec); + + /** + * GArrowCSVReadOptions:is-quoted: + * + * Whether quoting is used. + * + * Since: 0.12.0 + */ + spec = g_param_spec_boolean("is-quoted", + "Is quoted", + "Whether quoting is used", + parse_options.quoting, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_IS_QUOTED, spec); + + /** + * GArrowCSVReadOptions:quote-character: + * + * Quoting character. This is used only when + * #GArrowCSVReadOptions:is-quoted is %TRUE. + * + * Since: 0.12.0 + */ + spec = g_param_spec_char("quote-character", + "Quote character", + "Quoting character", + 0, + G_MAXINT8, + parse_options.quote_char, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_QUOTE_CHARACTER, spec); + + /** + * GArrowCSVReadOptions:is-double-quoted: + * + * Whether a quote inside a value is double quoted. + * + * Since: 0.12.0 + */ + spec = g_param_spec_boolean("is-double-quoted", + "Is double quoted", + "Whether a quote inside a value is double quoted", + parse_options.double_quote, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_IS_DOUBLE_QUOTED, spec); + + /** + * GArrowCSVReadOptions:is-escaped: + * + * Whether escaping is used. + * + * Since: 0.12.0 + */ + spec = g_param_spec_boolean("is-escaped", + "Is escaped", + "Whether escaping is used", + parse_options.escaping, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_IS_ESCAPED, spec); + + /** + * GArrowCSVReadOptions:escape-character: + * + * Escaping character. This is used only when + * #GArrowCSVReadOptions:is-escaped is %TRUE. + * + * Since: 0.12.0 + */ + spec = g_param_spec_char("escape-character", + "Escape character", + "Escaping character", + 0, + G_MAXINT8, + parse_options.escape_char, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_ESCAPE_CHARACTER, spec); + + /** + * GArrowCSVReadOptions:allow-newlines-in-values: + * + * Whether values are allowed to contain CR (0x0d) and LF (0x0a) characters. + * + * Since: 0.12.0 + */ + spec = g_param_spec_boolean("allow-newlines-in-values", + "Allow newlines in values", + "Whether values are allowed to contain " + "CR (0x0d) and LF (0x0a) characters.", + parse_options.newlines_in_values, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_ALLOW_NEWLINES_IN_VALUES, + spec); + + /** + * GArrowCSVReadOptions:ignore-empty-lines: + * + * Whether empty lines are ignored. If %FALSE, an empty line + * represents a simple empty value (assuming a one-column CSV file). + * + * Since: 0.12.0 + */ + spec = g_param_spec_boolean("ignore-empty-lines", + "Ignore empty lines", + "Whether empty lines are ignored. " + "If FALSE, an empty line represents " + "a simple empty value " + "(assuming a one-column CSV file).", + parse_options.ignore_empty_lines, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_IGNORE_EMPTY_LINES, + spec); + + auto convert_options = arrow::csv::ConvertOptions::Defaults(); + + /** + * GArrowCSVReadOptions:check-utf8: + * + * Whether to check UTF8 validity of string columns. + * + * Since: 0.12.0 + */ + spec = g_param_spec_boolean("check-utf8", + "Check UTF8", + "Whether to check UTF8 validity of string columns", + convert_options.check_utf8, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_CHECK_UTF8, spec); + + /** + * GArrowCSVReadOptions:allow-null-strings: + * + * Whether string / binary columns can have null values. + * If %TRUE, then strings in "null_values" are considered null for string columns. + * If %FALSE, then all strings are valid string values. + * + * Since: 0.14.0 + */ + spec = g_param_spec_boolean("allow-null-strings", + "Allow null strings", + "Whether string / binary columns can have null values. " + "If TRUE, then strings in null_values are considered null for string columns. " + "If FALSE, then all strings are valid string values.", + convert_options.strings_can_be_null, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_ALLOW_NULL_STRINGS, spec); +} + +/** + * garrow_csv_read_options_new: + * + * Returns: A newly created #GArrowCSVReadOptions. + * + * Since: 0.12.0 + */ +GArrowCSVReadOptions * +garrow_csv_read_options_new(void) +{ + auto csv_read_options = g_object_new(GARROW_TYPE_CSV_READ_OPTIONS, NULL); + return GARROW_CSV_READ_OPTIONS(csv_read_options); +} + +/** + * garrow_csv_read_options_add_column_type: + * @options: A #GArrowCSVReadOptions. + * @name: The name of the target column. + * @data_type: The #GArrowDataType for the column. + * + * Add value type of a column. + * + * Since: 0.12.0 + */ +void +garrow_csv_read_options_add_column_type(GArrowCSVReadOptions *options, + const gchar *name, + GArrowDataType *data_type) +{ + auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(options); + auto arrow_data_type = garrow_data_type_get_raw(data_type); + priv->convert_options.column_types[name] = arrow_data_type; +} + +/** + * garrow_csv_read_options_add_schema: + * @options: A #GArrowCSVReadOptions. + * @schema: The #GArrowSchema that specifies columns and their types. + * + * Add value types for columns in the schema. + * + * Since: 0.12.0 + */ +void +garrow_csv_read_options_add_schema(GArrowCSVReadOptions *options, + GArrowSchema *schema) +{ + auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(options); + auto arrow_schema = garrow_schema_get_raw(schema); + for (const auto &field : arrow_schema->fields()) { + priv->convert_options.column_types[field->name()] = field->type(); + } +} + +/** + * garrow_csv_read_options_get_column_types: + * @options: A #GArrowCSVReadOptions. + * + * Returns: (transfer full) (element-type gchar* GArrowDataType): + * The column name and value type mapping of the options. + * + * Since: 0.12.0 + */ +GHashTable * +garrow_csv_read_options_get_column_types(GArrowCSVReadOptions *options) +{ + auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(options); + GHashTable *types = g_hash_table_new_full(g_str_hash, + g_str_equal, + g_free, + g_object_unref); + for (const auto &iter : priv->convert_options.column_types) { + auto arrow_name = iter.first; + auto arrow_data_type = iter.second; + g_hash_table_insert(types, + g_strdup(arrow_name.c_str()), + garrow_data_type_new_raw(&arrow_data_type)); + } + return types; +} + +/** + * garrow_csv_read_options_set_null_values: + * @options: A #GArrowCSVReadOptions. + * @null_values: (array length=n_null_values): + * The values to be processed as null. + * @n_null_values: The number of the specified null values. + * + * Since: 0.14.0 + */ +void +garrow_csv_read_options_set_null_values(GArrowCSVReadOptions *options, + const gchar **null_values, + gsize n_null_values) +{ + auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(options); + priv->convert_options.null_values.resize(n_null_values); + for (gsize i = 0; i < n_null_values; ++i) { + priv->convert_options.null_values[i] = null_values[i]; + } +} + +/** + * garrow_csv_read_options_get_null_values: + * @options: A #GArrowCSVReadOptions. + * + * Returns: (nullable) (array zero-terminated=1) (element-type utf8) (transfer full): + * The values to be processed as null. + * + * If the number of values is zero, this returns %NULL. + * + * It's a %NULL-terminated string array. It must be freed with + * g_strfreev() when no longer needed. + * + * Since: 0.14.0 + */ +gchar ** +garrow_csv_read_options_get_null_values(GArrowCSVReadOptions *options) +{ + auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(options); + const auto &arrow_null_values = priv->convert_options.null_values; + if (arrow_null_values.empty()) { + return NULL; + } else { + auto n = arrow_null_values.size(); + gchar **null_values = g_new(gchar *, n + 1); + for (size_t i = 0; i < n; ++i) { + null_values[i] = g_strdup(arrow_null_values[i].c_str()); + } + null_values[n] = NULL; + return null_values; + } +} + +/** + * garrow_csv_read_options_add_null_value: + * @options: A #GArrowCSVReadOptions. + * @null_value: The value to be processed as null. + * + * Since: 0.14.0 + */ +void +garrow_csv_read_options_add_null_value(GArrowCSVReadOptions *options, + const gchar *null_value) +{ + auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(options); + priv->convert_options.null_values.push_back(null_value); +} + +/** + * garrow_csv_read_options_set_true_values: + * @options: A #GArrowCSVReadOptions. + * @true_values: (array length=n_true_values): + * The values to be processed as true. + * @n_true_values: The number of the specified true values. + * + * Since: 0.14.0 + */ +void +garrow_csv_read_options_set_true_values(GArrowCSVReadOptions *options, + const gchar **true_values, + gsize n_true_values) +{ + auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(options); + priv->convert_options.true_values.resize(n_true_values); + for (gsize i = 0; i < n_true_values; ++i) { + priv->convert_options.true_values[i] = true_values[i]; + } +} + +/** + * garrow_csv_read_options_get_true_values: + * @options: A #GArrowCSVReadOptions. + * + * Returns: (nullable) (array zero-terminated=1) (element-type utf8) (transfer full): + * The values to be processed as true. + * + * If the number of values is zero, this returns %NULL. + * + * It's a %NULL-terminated string array. It must be freed with + * g_strfreev() when no longer needed. + * + * Since: 0.14.0 + */ +gchar ** +garrow_csv_read_options_get_true_values(GArrowCSVReadOptions *options) +{ + auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(options); + const auto &arrow_true_values = priv->convert_options.true_values; + if (arrow_true_values.empty()) { + return NULL; + } else { + auto n = arrow_true_values.size(); + gchar **true_values = g_new(gchar *, n + 1); + for (size_t i = 0; i < n; ++i) { + true_values[i] = g_strdup(arrow_true_values[i].c_str()); + } + true_values[n] = NULL; + return true_values; + } +} + +/** + * garrow_csv_read_options_add_true_value: + * @options: A #GArrowCSVReadOptions. + * @true_value: The value to be processed as true. + * + * Since: 0.14.0 + */ +void +garrow_csv_read_options_add_true_value(GArrowCSVReadOptions *options, + const gchar *true_value) +{ + auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(options); + priv->convert_options.true_values.push_back(true_value); +} + +/** + * garrow_csv_read_options_set_false_values: + * @options: A #GArrowCSVReadOptions. + * @false_values: (array length=n_false_values): + * The values to be processed as false. + * @n_false_values: The number of the specified false values. + * + * Since: 0.14.0 + */ +void +garrow_csv_read_options_set_false_values(GArrowCSVReadOptions *options, + const gchar **false_values, + gsize n_false_values) +{ + auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(options); + priv->convert_options.false_values.resize(n_false_values); + for (gsize i = 0; i < n_false_values; ++i) { + priv->convert_options.false_values[i] = false_values[i]; + } +} + +/** + * garrow_csv_read_options_get_false_values: + * @options: A #GArrowCSVReadOptions. + * + * Returns: (nullable) (array zero-terminated=1) (element-type utf8) (transfer full): + * The values to be processed as false. + * + * If the number of values is zero, this returns %NULL. + * + * It's a %NULL-terminated string array. It must be freed with + * g_strfreev() when no longer needed. + * + * Since: 0.14.0 + */ +gchar ** +garrow_csv_read_options_get_false_values(GArrowCSVReadOptions *options) +{ + auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(options); + const auto &arrow_false_values = priv->convert_options.false_values; + if (arrow_false_values.empty()) { + return NULL; + } else { + auto n = arrow_false_values.size(); + gchar **false_values = g_new(gchar *, n + 1); + for (size_t i = 0; i < n; ++i) { + false_values[i] = g_strdup(arrow_false_values[i].c_str()); + } + false_values[n] = NULL; + return false_values; + } +} + +/** + * garrow_csv_read_options_add_false_value: + * @options: A #GArrowCSVReadOptions. + * @false_value: The value to be processed as false. + * + * Since: 0.14.0 + */ +void +garrow_csv_read_options_add_false_value(GArrowCSVReadOptions *options, + const gchar *false_value) +{ + auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(options); + priv->convert_options.false_values.push_back(false_value); +} + +/** + * garrow_csv_read_options_set_column_names: + * @options: A #GArrowCSVReadOptions. + * @column_names: (array length=n_column_names): + * The column names (if empty, will be read from first + * row after `skip_rows`) + * @n_column_names: The number of the specified column names. + * + * Since: 0.15.0 + */ +void +garrow_csv_read_options_set_column_names(GArrowCSVReadOptions *options, + const gchar **column_names, + gsize n_column_names) +{ + auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(options); + priv->read_options.column_names.resize(n_column_names); + for (gsize i = 0; i < n_column_names; ++i) { + priv->read_options.column_names[i] = column_names[i]; + } +} + +/** + * garrow_csv_read_options_get_column_names: + * @options: A #GArrowCSVReadOptions. + * + * Returns: (nullable) (array zero-terminated=1) (element-type utf8) (transfer full): + * The column names. + * + * If the number of values is zero, this returns %NULL. + * + * It's a %NULL-terminated string array. It must be freed with + * g_strfreev() when no longer needed. + * + * Since: 0.15.0 + */ +gchar ** +garrow_csv_read_options_get_column_names(GArrowCSVReadOptions *options) +{ + auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(options); + const auto &arrow_column_names = priv->read_options.column_names; + if (arrow_column_names.empty()) { + return NULL; + } else { + auto n = arrow_column_names.size(); + gchar **column_names = g_new(gchar *, n + 1); + for (size_t i = 0; i < n; ++i) { + column_names[i] = g_strdup(arrow_column_names[i].c_str()); + } + column_names[n] = NULL; + return column_names; + } +} + +/** + * garrow_csv_read_options_add_column_names: + * @options: A #GArrowCSVReadOptions. + * @column_name: The column name to be added. + * + * Since: 0.15.0 + */ +void +garrow_csv_read_options_add_column_name(GArrowCSVReadOptions *options, + const gchar *column_name) +{ + auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(options); + priv->read_options.column_names.push_back(column_name); +} + +typedef struct GArrowCSVReaderPrivate_ { + std::shared_ptr<arrow::csv::TableReader> reader; + GArrowInputStream *input; +} GArrowCSVReaderPrivate; + +enum { + PROP_CSV_TABLE_READER = 1, + PROP_CSV_READER_INPUT, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowCSVReader, + garrow_csv_reader, + G_TYPE_OBJECT) + +#define GARROW_CSV_READER_GET_PRIVATE(object) \ + static_cast<GArrowCSVReaderPrivate *>( \ + garrow_csv_reader_get_instance_private( \ + GARROW_CSV_READER(object))) + +static void +garrow_csv_reader_dispose(GObject *object) +{ + auto priv = GARROW_CSV_READER_GET_PRIVATE(object); + + if (priv->input) { + g_object_unref(priv->input); + priv->input = nullptr; + } + + G_OBJECT_CLASS(garrow_csv_reader_parent_class)->dispose(object); +} + +static void +garrow_csv_reader_finalize(GObject *object) +{ + auto priv = GARROW_CSV_READER_GET_PRIVATE(object); + + priv->reader.~shared_ptr(); + + G_OBJECT_CLASS(garrow_csv_reader_parent_class)->finalize(object); +} + +static void +garrow_csv_reader_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_CSV_READER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_CSV_TABLE_READER: + priv->reader = + *static_cast<std::shared_ptr<arrow::csv::TableReader> *>(g_value_get_pointer(value)); + break; + case PROP_CSV_READER_INPUT: + priv->input = GARROW_INPUT_STREAM(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_csv_reader_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_CSV_READER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_CSV_READER_INPUT: + g_value_set_object(value, priv->input); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_csv_reader_init(GArrowCSVReader *object) +{ + auto priv = GARROW_CSV_READER_GET_PRIVATE(object); + new(&priv->reader) std::shared_ptr<arrow::csv::TableReader>; +} + +static void +garrow_csv_reader_class_init(GArrowCSVReaderClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_csv_reader_dispose; + gobject_class->finalize = garrow_csv_reader_finalize; + gobject_class->set_property = garrow_csv_reader_set_property; + gobject_class->get_property = garrow_csv_reader_get_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("csv-table-reader", + "CSV table reader", + "The raw std::shared<arrow::csv::TableReader> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_CSV_TABLE_READER, spec); + + spec = g_param_spec_object("input", + "Input", + "The input stream to be read", + GARROW_TYPE_INPUT_STREAM, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, + PROP_CSV_READER_INPUT, + spec); +} + +/** + * garrow_csv_reader_new: + * @input: The input to be read. + * @options: (nullable): A #GArrowCSVReadOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowCSVReader or %NULL on error. + * + * Since: 0.12.0 + */ +GArrowCSVReader * +garrow_csv_reader_new(GArrowInputStream *input, + GArrowCSVReadOptions *options, + GError **error) +{ + auto arrow_input = garrow_input_stream_get_raw(input); + arrow::csv::ReadOptions read_options; + arrow::csv::ParseOptions parse_options; + arrow::csv::ConvertOptions convert_options; + if (options) { + auto options_priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(options); + read_options = options_priv->read_options; + parse_options = options_priv->parse_options; + convert_options = options_priv->convert_options; + } else { + read_options = arrow::csv::ReadOptions::Defaults(); + parse_options = arrow::csv::ParseOptions::Defaults(); + convert_options = arrow::csv::ConvertOptions::Defaults(); + } + + auto arrow_reader = + arrow::csv::TableReader::Make(arrow::io::default_io_context(), + arrow_input, + read_options, + parse_options, + convert_options); + if (garrow::check(error, arrow_reader, "[csv-reader][new]")) { + return garrow_csv_reader_new_raw(&(*arrow_reader), input); + } else { + return NULL; + } +} + +/** + * garrow_csv_reader_read: + * @reader: A #GArrowCSVReader. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): A read #GArrowTable or %NULL on error. + * + * Since: 0.12.0 + */ +GArrowTable * +garrow_csv_reader_read(GArrowCSVReader *reader, + GError **error) +{ + auto arrow_reader = garrow_csv_reader_get_raw(reader); + auto arrow_table = arrow_reader->Read(); + if (garrow::check(error, arrow_table, "[csv-reader][read]")) { + return garrow_table_new_raw(&(arrow_table.ValueOrDie())); + } else { + return NULL; + } +} + + +typedef struct GArrowJSONReadOptionsPrivate_ { + arrow::json::ReadOptions read_options; + arrow::json::ParseOptions parse_options; + GArrowSchema *schema; +} GArrowJSONReadOptionsPrivate; + +enum { + PROP_JSON_READ_OPTIONS_USE_THREADS = 1, + PROP_JSON_READ_OPTIONS_BLOCK_SIZE, + PROP_JSON_READ_OPTIONS_ALLOW_NEWLINES_IN_VALUES, + PROP_JSON_READ_OPTIONS_UNEXPECTED_FIELD_BEHAVIOR, + PROP_JSON_READ_OPTIONS_SCHEMA, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowJSONReadOptions, + garrow_json_read_options, + G_TYPE_OBJECT) + +#define GARROW_JSON_READ_OPTIONS_GET_PRIVATE(object) \ + static_cast<GArrowJSONReadOptionsPrivate *>( \ + garrow_json_read_options_get_instance_private( \ + GARROW_JSON_READ_OPTIONS(object))) + +static void +garrow_json_read_options_dispose(GObject *object) +{ + auto priv = GARROW_JSON_READ_OPTIONS_GET_PRIVATE(object); + + if (priv->schema) { + g_object_unref(priv->schema); + priv->schema = nullptr; + } + + G_OBJECT_CLASS(garrow_json_read_options_parent_class)->dispose(object); +} + +static void +garrow_json_read_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_JSON_READ_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_JSON_READ_OPTIONS_USE_THREADS: + priv->read_options.use_threads = g_value_get_boolean(value); + break; + case PROP_JSON_READ_OPTIONS_BLOCK_SIZE: + priv->read_options.block_size = g_value_get_int(value); + break; + case PROP_JSON_READ_OPTIONS_ALLOW_NEWLINES_IN_VALUES: + priv->parse_options.newlines_in_values = g_value_get_boolean(value); + break; + case PROP_JSON_READ_OPTIONS_UNEXPECTED_FIELD_BEHAVIOR: + priv->parse_options.unexpected_field_behavior = + static_cast<arrow::json::UnexpectedFieldBehavior>(g_value_get_enum(value)); + break; + case PROP_JSON_READ_OPTIONS_SCHEMA: + { + auto schema = g_value_dup_object(value); + if (priv->schema) { + g_object_unref(priv->schema); + } + if (schema) { + priv->schema = GARROW_SCHEMA(schema); + priv->parse_options.explicit_schema = garrow_schema_get_raw(priv->schema); + } else { + priv->schema = NULL; + priv->parse_options.explicit_schema = nullptr; + } + break; + } + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_json_read_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_JSON_READ_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_JSON_READ_OPTIONS_USE_THREADS: + g_value_set_boolean(value, priv->read_options.use_threads); + break; + case PROP_JSON_READ_OPTIONS_BLOCK_SIZE: + g_value_set_int(value, priv->read_options.block_size); + break; + case PROP_JSON_READ_OPTIONS_ALLOW_NEWLINES_IN_VALUES: + g_value_set_boolean(value, priv->parse_options.newlines_in_values); + break; + case PROP_JSON_READ_OPTIONS_UNEXPECTED_FIELD_BEHAVIOR: + g_value_set_enum(value, static_cast<int>(priv->parse_options.unexpected_field_behavior)); + break; + case PROP_JSON_READ_OPTIONS_SCHEMA: + g_value_set_object(value, priv->schema); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_json_read_options_init(GArrowJSONReadOptions *object) +{ + auto priv = GARROW_JSON_READ_OPTIONS_GET_PRIVATE(object); + priv->read_options = arrow::json::ReadOptions::Defaults(); + priv->parse_options = arrow::json::ParseOptions::Defaults(); +} + +static void +garrow_json_read_options_class_init(GArrowJSONReadOptionsClass *klass) +{ + GParamSpec *spec; + + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_json_read_options_dispose; + gobject_class->set_property = garrow_json_read_options_set_property; + gobject_class->get_property = garrow_json_read_options_get_property; + + auto read_options = arrow::json::ReadOptions::Defaults(); + + /** + * GArrowJSONReadOptions:use-threads: + * + * Whether to use the global CPU thread pool. + * + * Since: 0.14.0 + */ + spec = g_param_spec_boolean("use-threads", + "Use threads", + "Whether to use the global CPU thread pool", + read_options.use_threads, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_JSON_READ_OPTIONS_USE_THREADS, + spec); + + /** + * GArrowJSONReadOptions:block-size: + * + * Block size we request from the IO layer; also determines the size + * of chunks when #GArrowJSONReadOptions:use-threads is %TRUE. + * + * Since: 0.14.0 + */ + spec = g_param_spec_int("block-size", + "Block size", + "Block size we request from the IO layer; " + "also determines the size of chunks " + "when ::use-threads is TRUE", + 0, + G_MAXINT, + read_options.block_size, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_JSON_READ_OPTIONS_BLOCK_SIZE, + spec); + + + auto parse_options = arrow::json::ParseOptions::Defaults(); + + /** + * GArrowJSONReadOptions:allow-newlines-in-values: + * + * Whether objects may be printed across multiple lines (for example pretty printed). + * if %FALSE, input must end with an empty line. + * + * Since: 0.14.0 + */ + spec = g_param_spec_boolean("allow-newlines-in-values", + "Allow newlines in values", + "Whether objects may be printed across multiple lines " + "(for example pretty printed). " + "if FALSE, input must end with an empty line.", + parse_options.newlines_in_values, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_JSON_READ_OPTIONS_ALLOW_NEWLINES_IN_VALUES, + spec); + + /** + * GArrowJSONReadOptions:unexpected-field-behavior: + * + * How to parse handle fields outside the explicit schema. + * + * Since: 0.14.0 + */ + spec = g_param_spec_enum("unexpected-field-behavior", + "UnexpectedFieldBehavior", + "How to parse handle fields outside the explicit schema.", + GARROW_TYPE_JSON_READ_UNEXPECTED_FIELD_BEHAVIOR, + GARROW_JSON_READ_INFER_TYPE, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_JSON_READ_OPTIONS_UNEXPECTED_FIELD_BEHAVIOR, + spec); + + /** + * GArrowJSONReadOptions:schema: + * + * Schema for passing custom conversion rules. + * + * Since: 0.14.0 + */ + spec = g_param_spec_object("schema", + "Schema", + "Schema for passing custom conversion rules.", + GARROW_TYPE_SCHEMA, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_JSON_READ_OPTIONS_SCHEMA, + spec); +} + +/** + * garrow_json_read_options_new: + * + * Returns: A newly created #GArrowJSONReadOptions. + * + * Since: 0.14.0 + */ +GArrowJSONReadOptions * +garrow_json_read_options_new(void) +{ + auto json_read_options = g_object_new(GARROW_TYPE_JSON_READ_OPTIONS, NULL); + return GARROW_JSON_READ_OPTIONS(json_read_options); +} + + +typedef struct GArrowJSONReaderPrivate_ { + std::shared_ptr<arrow::json::TableReader> reader; + GArrowInputStream *input; +} GArrowJSONReaderPrivate; + +enum { + PROP_JSON_TABLE_READER = 1, + PROP_JSON_READER_INPUT, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowJSONReader, + garrow_json_reader, + G_TYPE_OBJECT) + +#define GARROW_JSON_READER_GET_PRIVATE(object) \ + static_cast<GArrowJSONReaderPrivate *>( \ + garrow_json_reader_get_instance_private( \ + GARROW_JSON_READER(object))) + +static void +garrow_json_reader_dispose(GObject *object) +{ + auto priv = GARROW_JSON_READER_GET_PRIVATE(object); + + if (priv->input) { + g_object_unref(priv->input); + priv->input = nullptr; + } + + G_OBJECT_CLASS(garrow_json_reader_parent_class)->dispose(object); +} + +static void +garrow_json_reader_finalize(GObject *object) +{ + auto priv = GARROW_JSON_READER_GET_PRIVATE(object); + + priv->reader.~shared_ptr(); + + G_OBJECT_CLASS(garrow_json_reader_parent_class)->finalize(object); +} + +static void +garrow_json_reader_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_JSON_READER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_JSON_TABLE_READER: + priv->reader = + *static_cast<std::shared_ptr<arrow::json::TableReader> *>(g_value_get_pointer(value)); + break; + case PROP_JSON_READER_INPUT: + priv->input = GARROW_INPUT_STREAM(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_json_reader_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_JSON_READER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_JSON_READER_INPUT: + g_value_set_object(value, priv->input); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_json_reader_init(GArrowJSONReader *object) +{ + auto priv = GARROW_JSON_READER_GET_PRIVATE(object); + new(&priv->reader) std::shared_ptr<arrow::json::TableReader>; +} + +static void +garrow_json_reader_class_init(GArrowJSONReaderClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_json_reader_dispose; + gobject_class->finalize = garrow_json_reader_finalize; + gobject_class->set_property = garrow_json_reader_set_property; + gobject_class->get_property = garrow_json_reader_get_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("json-table-reader", + "JSON table reader", + "The raw std::shared<arrow::json::TableReader> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_JSON_TABLE_READER, spec); + + spec = g_param_spec_object("input", + "Input", + "The input stream to be read", + GARROW_TYPE_INPUT_STREAM, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, + PROP_JSON_READER_INPUT, + spec); +} + +/** + * garrow_json_reader_new: + * @input: The input to be read. + * @options: (nullable): A #GArrowJSONReadOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowJSONReader or %NULL on error. + * + * Since: 0.14.0 + */ +GArrowJSONReader * +garrow_json_reader_new(GArrowInputStream *input, + GArrowJSONReadOptions *options, + GError **error) +{ + auto arrow_input = garrow_input_stream_get_raw(input); + arrow::Status status; + + arrow::Result<std::shared_ptr<arrow::json::TableReader>> arrow_reader; + if (options) { + auto options_priv = GARROW_JSON_READ_OPTIONS_GET_PRIVATE(options); + arrow_reader = arrow::json::TableReader::Make(arrow::default_memory_pool(), + arrow_input, + options_priv->read_options, + options_priv->parse_options); + } else { + arrow_reader = arrow::json::TableReader::Make(arrow::default_memory_pool(), + arrow_input, + arrow::json::ReadOptions::Defaults(), + arrow::json::ParseOptions::Defaults()); + } + + if (garrow::check(error, arrow_reader, "[json-reader][new]")) { + return garrow_json_reader_new_raw(&*arrow_reader, input); + } else { + return NULL; + } +} + +/** + * garrow_json_reader_read: + * @reader: A #GArrowJSONReader. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): A read #GArrowTable or %NULL on error. + * + * Since: 0.14.0 + */ +GArrowTable * +garrow_json_reader_read(GArrowJSONReader *reader, + GError **error) +{ + auto arrow_reader = garrow_json_reader_get_raw(reader); + auto arrow_table = arrow_reader->Read(); + if (garrow::check(error, arrow_table, "[json-reader][read]")) { + return garrow_table_new_raw(&(arrow_table.ValueOrDie())); + } else { + return NULL; + } +} + +G_END_DECLS + +GArrowRecordBatchReader * +garrow_record_batch_reader_new_raw( + std::shared_ptr<arrow::RecordBatchReader> *arrow_reader) +{ + return GARROW_RECORD_BATCH_READER( + g_object_new(GARROW_TYPE_RECORD_BATCH_READER, + "record-batch-reader", arrow_reader, + NULL)); +} + +std::shared_ptr<arrow::ipc::RecordBatchReader> +garrow_record_batch_reader_get_raw(GArrowRecordBatchReader *reader) +{ + auto priv = GARROW_RECORD_BATCH_READER_GET_PRIVATE(reader); + return priv->record_batch_reader; +} + +GArrowTableBatchReader * +garrow_table_batch_reader_new_raw(std::shared_ptr<arrow::TableBatchReader> *arrow_reader) +{ + auto reader = + GARROW_TABLE_BATCH_READER(g_object_new(GARROW_TYPE_TABLE_BATCH_READER, + "record-batch-reader", arrow_reader, + NULL)); + return reader; +} + +GArrowRecordBatchStreamReader * +garrow_record_batch_stream_reader_new_raw(std::shared_ptr<arrow::ipc::RecordBatchStreamReader> *arrow_reader) +{ + auto reader = + GARROW_RECORD_BATCH_STREAM_READER( + g_object_new(GARROW_TYPE_RECORD_BATCH_STREAM_READER, + "record-batch-reader", arrow_reader, + NULL)); + return reader; +} + +GArrowRecordBatchFileReader * +garrow_record_batch_file_reader_new_raw(std::shared_ptr<arrow::ipc::RecordBatchFileReader> *arrow_reader) +{ + auto reader = + GARROW_RECORD_BATCH_FILE_READER( + g_object_new(GARROW_TYPE_RECORD_BATCH_FILE_READER, + "record-batch-file-reader", arrow_reader, + NULL)); + return reader; +} + +std::shared_ptr<arrow::ipc::RecordBatchFileReader> +garrow_record_batch_file_reader_get_raw(GArrowRecordBatchFileReader *reader) +{ + auto priv = GARROW_RECORD_BATCH_FILE_READER_GET_PRIVATE(reader); + return priv->record_batch_file_reader; +} + +GArrowFeatherFileReader * +garrow_feather_file_reader_new_raw(std::shared_ptr<arrow::ipc::feather::Reader> *arrow_reader) +{ + auto reader = + GARROW_FEATHER_FILE_READER( + g_object_new(GARROW_TYPE_FEATHER_FILE_READER, + "feather-reader", arrow_reader, + NULL)); + return reader; +} + +std::shared_ptr<arrow::ipc::feather::Reader> +garrow_feather_file_reader_get_raw(GArrowFeatherFileReader *reader) +{ + auto priv = GARROW_FEATHER_FILE_READER_GET_PRIVATE(reader); + return priv->feather_reader; +} + +GArrowCSVReader * +garrow_csv_reader_new_raw(std::shared_ptr<arrow::csv::TableReader> *arrow_reader, + GArrowInputStream *input) +{ + auto reader = GARROW_CSV_READER(g_object_new(GARROW_TYPE_CSV_READER, + "csv-table-reader", arrow_reader, + "input", input, + NULL)); + return reader; +} + +std::shared_ptr<arrow::csv::TableReader> +garrow_csv_reader_get_raw(GArrowCSVReader *reader) +{ + auto priv = GARROW_CSV_READER_GET_PRIVATE(reader); + return priv->reader; +} + +GArrowJSONReader * +garrow_json_reader_new_raw(std::shared_ptr<arrow::json::TableReader> *arrow_reader, + GArrowInputStream *input) +{ + auto reader = GARROW_JSON_READER(g_object_new(GARROW_TYPE_JSON_READER, + "json-table-reader", arrow_reader, + "input", input, + NULL)); + return reader; +} + +std::shared_ptr<arrow::json::TableReader> +garrow_json_reader_get_raw(GArrowJSONReader *reader) +{ + auto priv = GARROW_JSON_READER_GET_PRIVATE(reader); + return priv->reader; +} diff --git a/src/arrow/c_glib/arrow-glib/reader.h b/src/arrow/c_glib/arrow-glib/reader.h new file mode 100644 index 000000000..1bdd322e6 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/reader.h @@ -0,0 +1,382 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/gobject-type.h> +#include <arrow-glib/record-batch.h> +#include <arrow-glib/schema.h> +#include <arrow-glib/table.h> + +#include <arrow-glib/input-stream.h> + +#include <arrow-glib/metadata-version.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_RECORD_BATCH_READER (garrow_record_batch_reader_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchReader, + garrow_record_batch_reader, + GARROW, + RECORD_BATCH_READER, + GObject) +struct _GArrowRecordBatchReaderClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GArrowRecordBatchReader * +garrow_record_batch_reader_import(gpointer c_abi_array_stream, + GError **error); + +GARROW_AVAILABLE_IN_6_0 +GArrowRecordBatchReader * +garrow_record_batch_reader_new(GList *record_batches, + GArrowSchema *schema, + GError **error); + +GARROW_AVAILABLE_IN_6_0 +gpointer +garrow_record_batch_reader_export(GArrowRecordBatchReader *reader, + GError **error); + +GArrowSchema *garrow_record_batch_reader_get_schema( + GArrowRecordBatchReader *reader); +#ifndef GARROW_DISABLE_DEPRECATED +G_GNUC_DEPRECATED_FOR(garrow_record_batch_reader_read_next) +GArrowRecordBatch *garrow_record_batch_reader_get_next_record_batch( + GArrowRecordBatchReader *reader, + GError **error); +#endif +#ifndef GARROW_DISABLE_DEPRECATED +G_GNUC_DEPRECATED_FOR(garrow_record_batch_reader_read_next) +GArrowRecordBatch *garrow_record_batch_reader_read_next_record_batch( + GArrowRecordBatchReader *reader, + GError **error); +#endif +GArrowRecordBatch *garrow_record_batch_reader_read_next( + GArrowRecordBatchReader *reader, + GError **error); +GARROW_AVAILABLE_IN_6_0 +GArrowTable * +garrow_record_batch_reader_read_all(GArrowRecordBatchReader *reader, + GError **error); + +#define GARROW_TYPE_TABLE_BATCH_READER (garrow_table_batch_reader_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowTableBatchReader, + garrow_table_batch_reader, + GARROW, + TABLE_BATCH_READER, + GArrowRecordBatchReader) +struct _GArrowTableBatchReaderClass +{ + GArrowRecordBatchReaderClass parent_class; +}; + +GArrowTableBatchReader *garrow_table_batch_reader_new(GArrowTable *table); + + +#define GARROW_TYPE_RECORD_BATCH_STREAM_READER \ + (garrow_record_batch_stream_reader_get_type()) +#define GARROW_RECORD_BATCH_STREAM_READER(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST((obj), \ + GARROW_TYPE_RECORD_BATCH_STREAM_READER, \ + GArrowRecordBatchStreamReader)) +#define GARROW_RECORD_BATCH_STREAM_READER_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST((klass), \ + GARROW_TYPE_RECORD_BATCH_STREAM_READER, \ + GArrowRecordBatchStreamReaderClass)) +#define GARROW_IS_RECORD_BATCH_STREAM_READER(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE((obj), \ + GARROW_TYPE_RECORD_BATCH_STREAM_READER)) +#define GARROW_IS_RECORD_BATCH_STREAM_READER_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE((klass), \ + GARROW_TYPE_RECORD_BATCH_STREAM_READER)) +#define GARROW_RECORD_BATCH_STREAM_READER_GET_CLASS(obj) \ + (G_TYPE_INSTANCE_GET_CLASS((obj), \ + GARROW_TYPE_RECORD_BATCH_STREAM_READER, \ + GArrowRecordBatchStreamReaderClass)) + +typedef struct _GArrowRecordBatchStreamReader GArrowRecordBatchStreamReader; +#ifndef __GTK_DOC_IGNORE__ +typedef struct _GArrowRecordBatchStreamReaderClass GArrowRecordBatchStreamReaderClass; +#endif + +/** + * GArrowRecordBatchStreamReader: + * + * It wraps `arrow::ipc::RecordBatchStreamReader`. + */ +struct _GArrowRecordBatchStreamReader +{ + /*< private >*/ + GArrowRecordBatchReader parent_instance; +}; + +#ifndef __GTK_DOC_IGNORE__ +struct _GArrowRecordBatchStreamReaderClass +{ + GArrowRecordBatchReaderClass parent_class; +}; +#endif + +GType garrow_record_batch_stream_reader_get_type(void) G_GNUC_CONST; + +GArrowRecordBatchStreamReader *garrow_record_batch_stream_reader_new( + GArrowInputStream *stream, + GError **error); + + +#define GARROW_TYPE_RECORD_BATCH_FILE_READER \ + (garrow_record_batch_file_reader_get_type()) +#define GARROW_RECORD_BATCH_FILE_READER(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST((obj), \ + GARROW_TYPE_RECORD_BATCH_FILE_READER, \ + GArrowRecordBatchFileReader)) +#define GARROW_RECORD_BATCH_FILE_READER_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST((klass), \ + GARROW_TYPE_RECORD_BATCH_FILE_READER, \ + GArrowRecordBatchFileReaderClass)) +#define GARROW_IS_RECORD_BATCH_FILE_READER(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE((obj), \ + GARROW_TYPE_RECORD_BATCH_FILE_READER)) +#define GARROW_IS_RECORD_BATCH_FILE_READER_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE((klass), \ + GARROW_TYPE_RECORD_BATCH_FILE_READER)) +#define GARROW_RECORD_BATCH_FILE_READER_GET_CLASS(obj) \ + (G_TYPE_INSTANCE_GET_CLASS((obj), \ + GARROW_TYPE_RECORD_BATCH_FILE_READER, \ + GArrowRecordBatchFileReaderClass)) + +typedef struct _GArrowRecordBatchFileReader GArrowRecordBatchFileReader; +#ifndef __GTK_DOC_IGNORE__ +typedef struct _GArrowRecordBatchFileReaderClass GArrowRecordBatchFileReaderClass; +#endif + +/** + * GArrowRecordBatchFileReader: + * + * It wraps `arrow::ipc::RecordBatchFileReader`. + */ +struct _GArrowRecordBatchFileReader +{ + /*< private >*/ + GObject parent_instance; +}; + +#ifndef __GTK_DOC_IGNORE__ +struct _GArrowRecordBatchFileReaderClass +{ + GObjectClass parent_class; +}; +#endif + +GType garrow_record_batch_file_reader_get_type(void) G_GNUC_CONST; + +GArrowRecordBatchFileReader *garrow_record_batch_file_reader_new( + GArrowSeekableInputStream *file, + GError **error); + +GArrowSchema *garrow_record_batch_file_reader_get_schema( + GArrowRecordBatchFileReader *reader); +guint garrow_record_batch_file_reader_get_n_record_batches( + GArrowRecordBatchFileReader *reader); +GArrowMetadataVersion garrow_record_batch_file_reader_get_version( + GArrowRecordBatchFileReader *reader); +#ifndef GARROW_DISABLE_DEPRECATED +G_GNUC_DEPRECATED_FOR(garrow_record_batch_file_reader_read_record_batch) +GArrowRecordBatch *garrow_record_batch_file_reader_get_record_batch( + GArrowRecordBatchFileReader *reader, + guint i, + GError **error); +#endif +GArrowRecordBatch *garrow_record_batch_file_reader_read_record_batch( + GArrowRecordBatchFileReader *reader, + guint i, + GError **error); + + +#define GARROW_TYPE_FEATHER_FILE_READER (garrow_feather_file_reader_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowFeatherFileReader, + garrow_feather_file_reader, + GARROW, + FEATHER_FILE_READER, + GObject) +struct _GArrowFeatherFileReaderClass +{ + GObjectClass parent_class; +}; + +GArrowFeatherFileReader *garrow_feather_file_reader_new( + GArrowSeekableInputStream *file, + GError **error); + +gint garrow_feather_file_reader_get_version( + GArrowFeatherFileReader *reader); +GArrowTable * +garrow_feather_file_reader_read(GArrowFeatherFileReader *reader, + GError **error); +GArrowTable * +garrow_feather_file_reader_read_indices(GArrowFeatherFileReader *reader, + const gint *indices, + guint n_indices, + GError **error); +GArrowTable * +garrow_feather_file_reader_read_names(GArrowFeatherFileReader *reader, + const gchar **names, + guint n_names, + GError **error); + +#define GARROW_TYPE_CSV_READ_OPTIONS (garrow_csv_read_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowCSVReadOptions, + garrow_csv_read_options, + GARROW, + CSV_READ_OPTIONS, + GObject) +struct _GArrowCSVReadOptionsClass +{ + GObjectClass parent_class; +}; + +GArrowCSVReadOptions *garrow_csv_read_options_new(void); +void +garrow_csv_read_options_add_column_type(GArrowCSVReadOptions *options, + const gchar *name, + GArrowDataType *data_type); +void +garrow_csv_read_options_add_schema(GArrowCSVReadOptions *options, + GArrowSchema *schema); +GHashTable * +garrow_csv_read_options_get_column_types(GArrowCSVReadOptions *options); +GARROW_AVAILABLE_IN_0_14 +void +garrow_csv_read_options_set_null_values(GArrowCSVReadOptions *options, + const gchar **null_values, + gsize n_null_values); +GARROW_AVAILABLE_IN_0_14 +gchar ** +garrow_csv_read_options_get_null_values(GArrowCSVReadOptions *options); +GARROW_AVAILABLE_IN_0_14 +void +garrow_csv_read_options_add_null_value(GArrowCSVReadOptions *options, + const gchar *null_value); +GARROW_AVAILABLE_IN_0_14 +void +garrow_csv_read_options_set_true_values(GArrowCSVReadOptions *options, + const gchar **true_values, + gsize n_true_values); +GARROW_AVAILABLE_IN_0_14 +gchar ** +garrow_csv_read_options_get_true_values(GArrowCSVReadOptions *options); +GARROW_AVAILABLE_IN_0_14 +void +garrow_csv_read_options_add_true_value(GArrowCSVReadOptions *options, + const gchar *true_value); +GARROW_AVAILABLE_IN_0_14 +void +garrow_csv_read_options_set_false_values(GArrowCSVReadOptions *options, + const gchar **false_values, + gsize n_false_values); +GARROW_AVAILABLE_IN_0_14 +gchar ** +garrow_csv_read_options_get_false_values(GArrowCSVReadOptions *options); +GARROW_AVAILABLE_IN_0_14 +void +garrow_csv_read_options_add_false_value(GArrowCSVReadOptions *options, + const gchar *false_value); +GARROW_AVAILABLE_IN_0_15 +void +garrow_csv_read_options_set_column_names(GArrowCSVReadOptions *options, + const gchar **column_names, + gsize n_column_names); +GARROW_AVAILABLE_IN_0_15 +gchar ** +garrow_csv_read_options_get_column_names(GArrowCSVReadOptions *options); +GARROW_AVAILABLE_IN_0_15 +void +garrow_csv_read_options_add_column_name(GArrowCSVReadOptions *options, + const gchar *column_name); + +#define GARROW_TYPE_CSV_READER (garrow_csv_reader_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowCSVReader, + garrow_csv_reader, + GARROW, + CSV_READER, + GObject) +struct _GArrowCSVReaderClass +{ + GObjectClass parent_class; +}; + +GArrowCSVReader *garrow_csv_reader_new(GArrowInputStream *input, + GArrowCSVReadOptions *options, + GError **error); +GArrowTable *garrow_csv_reader_read(GArrowCSVReader *reader, + GError **error); + + +/** + * GArrowJSONReadUnexpectedFieldBehavior: + * @GARROW_JSON_READ_IGNORE: Ignore other fields. + * @GARROW_JSON_READ_ERROR: Return error. + * @GARROW_JSON_READ_INFER_TYPE: Infer a type. + * + * They are corresponding to `arrow::json::UnexpectedFieldBehavior` values. + */ +typedef enum { + GARROW_JSON_READ_IGNORE, + GARROW_JSON_READ_ERROR, + GARROW_JSON_READ_INFER_TYPE, +} GArrowJSONReadUnexpectedFieldBehavior; + +#define GARROW_TYPE_JSON_READ_OPTIONS (garrow_json_read_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowJSONReadOptions, + garrow_json_read_options, + GARROW, + JSON_READ_OPTIONS, + GObject) +struct _GArrowJSONReadOptionsClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_14 +GArrowJSONReadOptions *garrow_json_read_options_new(void); + +#define GARROW_TYPE_JSON_READER (garrow_json_reader_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowJSONReader, + garrow_json_reader, + GARROW, + JSON_READER, + GObject) +struct _GArrowJSONReaderClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_14 +GArrowJSONReader *garrow_json_reader_new(GArrowInputStream *input, + GArrowJSONReadOptions *options, + GError **error); +GARROW_AVAILABLE_IN_0_14 +GArrowTable *garrow_json_reader_read(GArrowJSONReader *reader, + GError **error); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/reader.hpp b/src/arrow/c_glib/arrow-glib/reader.hpp new file mode 100644 index 000000000..c7b2b76f2 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/reader.hpp @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/api.h> +#include <arrow/csv/api.h> +#include <arrow/ipc/api.h> +#include <arrow/ipc/feather.h> +#include <arrow/json/api.h> + +#include <arrow-glib/reader.h> + +GArrowRecordBatchReader *garrow_record_batch_reader_new_raw(std::shared_ptr<arrow::ipc::RecordBatchReader> *arrow_reader); +std::shared_ptr<arrow::ipc::RecordBatchReader> garrow_record_batch_reader_get_raw(GArrowRecordBatchReader *reader); + +GArrowTableBatchReader *garrow_table_batch_reader_new_raw(std::shared_ptr<arrow::TableBatchReader> *arrow_reader); + +GArrowRecordBatchStreamReader *garrow_record_batch_stream_reader_new_raw(std::shared_ptr<arrow::ipc::RecordBatchStreamReader> *arrow_reader); + +GArrowRecordBatchFileReader * +garrow_record_batch_file_reader_new_raw(std::shared_ptr<arrow::ipc::RecordBatchFileReader> *arrow_reader); +std::shared_ptr<arrow::ipc::RecordBatchFileReader> garrow_record_batch_file_reader_get_raw(GArrowRecordBatchFileReader *reader); + +GArrowFeatherFileReader * +garrow_feather_file_reader_new_raw(std::shared_ptr<arrow::ipc::feather::Reader> *arrow_reader); +std::shared_ptr<arrow::ipc::feather::Reader> +garrow_feather_file_reader_get_raw(GArrowFeatherFileReader *reader); + +GArrowCSVReader * +garrow_csv_reader_new_raw(std::shared_ptr<arrow::csv::TableReader> *arrow_reader, + GArrowInputStream *input); +std::shared_ptr<arrow::csv::TableReader> +garrow_csv_reader_get_raw(GArrowCSVReader *reader); + +GArrowJSONReader * +garrow_json_reader_new_raw(std::shared_ptr<arrow::json::TableReader> *arrow_reader, + GArrowInputStream *input); +std::shared_ptr<arrow::json::TableReader> +garrow_json_reader_get_raw(GArrowJSONReader *reader); diff --git a/src/arrow/c_glib/arrow-glib/record-batch.cpp b/src/arrow/c_glib/arrow-glib/record-batch.cpp new file mode 100644 index 000000000..0903671e8 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/record-batch.cpp @@ -0,0 +1,726 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/array.hpp> +#include <arrow-glib/buffer.hpp> +#include <arrow-glib/error.hpp> +#include <arrow-glib/field.hpp> +#include <arrow-glib/internal-index.hpp> +#include <arrow-glib/ipc-options.hpp> +#include <arrow-glib/record-batch.hpp> +#include <arrow-glib/schema.hpp> + +#include <arrow/c/bridge.h> +#include <arrow/util/iterator.h> + +#include <sstream> + +G_BEGIN_DECLS + +/** + * SECTION: record-batch + * @section_id: record-batch + * @title: Record batch related classes + * @include: arrow-glib/arrow-glib.h + * + * #GArrowRecordBatch is a class for record batch. Record batch is + * similar to #GArrowTable. Record batch also has also zero or more + * columns and zero or more records. + * + * Record batch is used for shared memory IPC. + * + * #GArrowRecordBatchIterator is a class for iterating record + * batches. + */ + +typedef struct GArrowRecordBatchPrivate_ { + std::shared_ptr<arrow::RecordBatch> record_batch; +} GArrowRecordBatchPrivate; + +enum { + PROP_RECORD_BATCH = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowRecordBatch, + garrow_record_batch, + G_TYPE_OBJECT) + +#define GARROW_RECORD_BATCH_GET_PRIVATE(obj) \ + static_cast<GArrowRecordBatchPrivate *>( \ + garrow_record_batch_get_instance_private( \ + GARROW_RECORD_BATCH(obj))) + +static void +garrow_record_batch_finalize(GObject *object) +{ + auto priv = GARROW_RECORD_BATCH_GET_PRIVATE(object); + + priv->record_batch.~shared_ptr(); + + G_OBJECT_CLASS(garrow_record_batch_parent_class)->finalize(object); +} + +static void +garrow_record_batch_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_RECORD_BATCH_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_RECORD_BATCH: + priv->record_batch = + *static_cast<std::shared_ptr<arrow::RecordBatch> *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_record_batch_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + switch (prop_id) { + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_record_batch_init(GArrowRecordBatch *object) +{ + auto priv = GARROW_RECORD_BATCH_GET_PRIVATE(object); + new(&priv->record_batch) std::shared_ptr<arrow::RecordBatch>; +} + +static void +garrow_record_batch_class_init(GArrowRecordBatchClass *klass) +{ + GObjectClass *gobject_class; + GParamSpec *spec; + + gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_record_batch_finalize; + gobject_class->set_property = garrow_record_batch_set_property; + gobject_class->get_property = garrow_record_batch_get_property; + + spec = g_param_spec_pointer("record-batch", + "RecordBatch", + "The raw std::shared<arrow::RecordBatch> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_RECORD_BATCH, spec); +} + +/** + * garrow_record_batch_import: + * @c_abi_array: (not nullable): A `struct ArrowArray *`. + * @schema: A #GArrowSchema of the C ABI array. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): An imported #GArrowRecordBatch + * on success, %NULL on error. + * + * You don't need to release the passed `struct ArrowArray *`, + * even if this function reports an error. + * + * Since: 6.0.0 + */ +GArrowRecordBatch * +garrow_record_batch_import(gpointer c_abi_array, + GArrowSchema *schema, + GError **error) +{ + auto arrow_schema = garrow_schema_get_raw(schema); + auto arrow_record_batch_result = + arrow::ImportRecordBatch(static_cast<ArrowArray *>(c_abi_array), + arrow_schema); + if (garrow::check(error, + arrow_record_batch_result, + "[record-batch][import]")) { + return garrow_record_batch_new_raw(&(*arrow_record_batch_result)); + } else { + return NULL; + } +} + +/** + * garrow_record_batch_new: + * @schema: The schema of the record batch. + * @n_rows: The number of the rows in the record batch. + * @columns: (element-type GArrowArray): The columns in the record batch. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowRecordBatch or %NULL on error. + */ +GArrowRecordBatch * +garrow_record_batch_new(GArrowSchema *schema, + guint32 n_rows, + GList *columns, + GError **error) +{ + const gchar *tag = "[record-batch][new]"; + + std::vector<std::shared_ptr<arrow::Array>> arrow_columns; + for (GList *node = columns; node; node = node->next) { + GArrowArray *column = GARROW_ARRAY(node->data); + arrow_columns.push_back(garrow_array_get_raw(column)); + } + + const auto &arrow_schema = garrow_schema_get_raw(schema); + if (arrow_schema->num_fields() != static_cast<int>(arrow_columns.size())) { + auto status = + arrow::Status::Invalid("Number of columns did not match schema"); + garrow_error_check(error, status, tag); + return NULL; + } + + auto arrow_record_batch = + arrow::RecordBatch::Make(arrow_schema, n_rows, arrow_columns); + auto status = arrow_record_batch->Validate(); + if (garrow_error_check(error, status, tag)) { + return garrow_record_batch_new_raw(&arrow_record_batch); + } else { + return NULL; + } +} + +/** + * garrow_record_batch_export: + * @record_batch: A #GArrowRecordBatch. + * @c_abi_array: (out): Return location for a `struct ArrowArray *`. + * It should be freed with the `ArrowArray::release` callback then + * g_free() when no longer needed. + * @c_abi_schema: (out) (nullable): Return location for a + * `struct ArrowSchema *` or %NULL. + * It should be freed with the `ArrowSchema::release` callback then + * g_free() when no longer needed. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE on error. + * + * Since: 6.0.0 + */ +gboolean +garrow_record_batch_export(GArrowRecordBatch *record_batch, + gpointer *c_abi_array, + gpointer *c_abi_schema, + GError **error) +{ + const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch); + *c_abi_array = g_new(ArrowArray, 1); + arrow::Status status; + if (c_abi_schema) { + *c_abi_schema = g_new(ArrowSchema, 1); + status = arrow::ExportRecordBatch(*arrow_record_batch, + static_cast<ArrowArray *>(*c_abi_array), + static_cast<ArrowSchema *>(*c_abi_schema)); + } else { + status = arrow::ExportRecordBatch(*arrow_record_batch, + static_cast<ArrowArray *>(*c_abi_array)); + } + if (garrow::check(error, status, "[record-batch][export]")) { + return true; + } else { + g_free(*c_abi_array); + *c_abi_array = nullptr; + if (c_abi_schema) { + g_free(*c_abi_schema); + *c_abi_schema = nullptr; + } + return false; + } +} + +/** + * garrow_record_batch_equal: + * @record_batch: A #GArrowRecordBatch. + * @other_record_batch: A #GArrowRecordBatch to be compared. + * + * Returns: %TRUE if both of them have the same data, %FALSE + * otherwise. + * + * Since: 0.4.0 + */ +gboolean +garrow_record_batch_equal(GArrowRecordBatch *record_batch, + GArrowRecordBatch *other_record_batch) +{ + const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch); + const auto arrow_other_record_batch = + garrow_record_batch_get_raw(other_record_batch); + return arrow_record_batch->Equals(*arrow_other_record_batch); +} + +/** + * garrow_record_batch_equal_metadata: + * @record_batch: A #GArrowRecordBatch. + * @other_record_batch: A #GArrowRecordBatch to be compared. + * @check_metadata: Whether to compare metadata. + * + * Returns: %TRUE if both of them have the same data, %FALSE + * otherwise. + * + * Since: 0.17.0 + */ +gboolean +garrow_record_batch_equal_metadata(GArrowRecordBatch *record_batch, + GArrowRecordBatch *other_record_batch, + gboolean check_metadata) +{ + const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch); + const auto arrow_other_record_batch = garrow_record_batch_get_raw(other_record_batch); + return arrow_record_batch->Equals(*arrow_other_record_batch, check_metadata); +} + +/** + * garrow_record_batch_get_schema: + * @record_batch: A #GArrowRecordBatch. + * + * Returns: (transfer full): The schema of the record batch. + */ +GArrowSchema * +garrow_record_batch_get_schema(GArrowRecordBatch *record_batch) +{ + const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch); + auto arrow_schema = arrow_record_batch->schema(); + return garrow_schema_new_raw(&arrow_schema); +} + +/** + * garrow_record_batch_get_column_data: + * @record_batch: A #GArrowRecordBatch. + * @i: The index of the target column. If it's negative, index is + * counted backward from the end of the columns. `-1` means the last + * column. + * + * Returns: (transfer full) (nullable): The i-th column in the record batch + * on success, %NULL on out of index. + * + * Since: 0.15.0 + */ +GArrowArray * +garrow_record_batch_get_column_data(GArrowRecordBatch *record_batch, + gint i) +{ + const auto &arrow_record_batch = garrow_record_batch_get_raw(record_batch); + if (!garrow_internal_index_adjust(i, arrow_record_batch->num_columns())) { + return NULL; + } + auto arrow_column = arrow_record_batch->column(i); + return garrow_array_new_raw(&arrow_column); +} + +/** + * garrow_record_batch_get_column_name: + * @record_batch: A #GArrowRecordBatch. + * @i: The index of the target column. If it's negative, index is + * counted backward from the end of the columns. `-1` means the last + * column. + * + * Returns: (nullable): The name of the i-th column in the record batch + * on success, %NULL on out of index + */ +const gchar * +garrow_record_batch_get_column_name(GArrowRecordBatch *record_batch, + gint i) +{ + const auto &arrow_record_batch = garrow_record_batch_get_raw(record_batch); + if (!garrow_internal_index_adjust(i, arrow_record_batch->num_columns())) { + return NULL; + } + return arrow_record_batch->column_name(i).c_str(); +} + +/** + * garrow_record_batch_get_n_columns: + * @record_batch: A #GArrowRecordBatch. + * + * Returns: The number of columns in the record batch. + */ +guint +garrow_record_batch_get_n_columns(GArrowRecordBatch *record_batch) +{ + const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch); + return arrow_record_batch->num_columns(); +} + +/** + * garrow_record_batch_get_n_rows: + * @record_batch: A #GArrowRecordBatch. + * + * Returns: The number of rows in the record batch. + */ +gint64 +garrow_record_batch_get_n_rows(GArrowRecordBatch *record_batch) +{ + const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch); + return arrow_record_batch->num_rows(); +} + +/** + * garrow_record_batch_slice: + * @record_batch: A #GArrowRecordBatch. + * @offset: The offset of sub #GArrowRecordBatch. + * @length: The length of sub #GArrowRecordBatch. + * + * Returns: (transfer full): The sub #GArrowRecordBatch. It covers + * only from `offset` to `offset + length` range. The sub + * #GArrowRecordBatch shares values with the base + * #GArrowRecordBatch. + */ +GArrowRecordBatch * +garrow_record_batch_slice(GArrowRecordBatch *record_batch, + gint64 offset, + gint64 length) +{ + const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch); + auto arrow_sub_record_batch = arrow_record_batch->Slice(offset, length); + return garrow_record_batch_new_raw(&arrow_sub_record_batch); +} + +/** + * garrow_record_batch_to_string: + * @record_batch: A #GArrowRecordBatch. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): + * The formatted record batch content or %NULL on error. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 0.4.0 + */ +gchar * +garrow_record_batch_to_string(GArrowRecordBatch *record_batch, GError **error) +{ + const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch); + return g_strdup(arrow_record_batch->ToString().c_str()); +} + +/** + * garrow_record_batch_add_column: + * @record_batch: A #GArrowRecordBatch. + * @i: The index of the new column. + * @field: The field to be added. + * @column: The column to be added. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The newly allocated + * #GArrowRecordBatch that has a new column or %NULL on error. + * + * Since: 0.9.0 + */ +GArrowRecordBatch * +garrow_record_batch_add_column(GArrowRecordBatch *record_batch, + guint i, + GArrowField *field, + GArrowArray *column, + GError **error) +{ + const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch); + const auto arrow_field = garrow_field_get_raw(field); + const auto arrow_column = garrow_array_get_raw(column); + auto arrow_new_record_batch = + arrow_record_batch->AddColumn(i, arrow_field, arrow_column); + if (garrow::check(error, + arrow_new_record_batch, + "[record-batch][add-column]")) { + return garrow_record_batch_new_raw(&(*arrow_new_record_batch)); + } else { + return NULL; + } +} + +/** + * garrow_record_batch_remove_column: + * @record_batch: A #GArrowRecordBatch. + * @i: The index of the new column. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The newly allocated + * #GArrowRecordBatch that doesn't have the column or %NULL on error. + * + * Since: 0.9.0 + */ +GArrowRecordBatch * +garrow_record_batch_remove_column(GArrowRecordBatch *record_batch, + guint i, + GError **error) +{ + const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch); + auto arrow_new_record_batch = arrow_record_batch->RemoveColumn(i); + if (garrow::check(error, + arrow_new_record_batch, + "[record-batch][remove-column]")) { + return garrow_record_batch_new_raw(&(*arrow_new_record_batch)); + } else { + return NULL; + } +} + +/** + * garrow_record_batch_serialize: + * @record_batch: A #GArrowRecordBatch. + * @options: (nullable): A #GArrowWriteOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The newly allocated + * #GArrowBuffer that contains a serialized record batch or %NULL on + * error. + * + * Since: 1.0.0 + */ +GArrowBuffer * +garrow_record_batch_serialize(GArrowRecordBatch *record_batch, + GArrowWriteOptions *options, + GError **error) +{ + const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch); + arrow::Result<std::shared_ptr<arrow::Buffer>> arrow_buffer; + if (options) { + auto arrow_options = garrow_write_options_get_raw(options); + auto arrow_buffer = arrow::ipc::SerializeRecordBatch(*arrow_record_batch, + *arrow_options); + if (garrow::check(error, arrow_buffer, "[record-batch][serialize]")) { + return garrow_buffer_new_raw(&(*arrow_buffer)); + } else { + return NULL; + } + } else { + const auto arrow_options = arrow::ipc::IpcWriteOptions::Defaults(); + auto arrow_buffer = arrow::ipc::SerializeRecordBatch(*arrow_record_batch, + arrow_options); + if (garrow::check(error, arrow_buffer, "[record-batch][serialize]")) { + return garrow_buffer_new_raw(&(*arrow_buffer)); + } else { + return NULL; + } + } +} + + +typedef struct GArrowRecordBatchIteratorPrivate_ { + arrow::RecordBatchIterator iterator; +} GArrowRecordBatchIteratorPrivate; + +enum { + PROP_ITERATOR = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowRecordBatchIterator, + garrow_record_batch_iterator, + G_TYPE_OBJECT) + +#define GARROW_RECORD_BATCH_ITERATOR_GET_PRIVATE(obj) \ + static_cast<GArrowRecordBatchIteratorPrivate *>( \ + garrow_record_batch_iterator_get_instance_private( \ + GARROW_RECORD_BATCH_ITERATOR(obj))) + +static void +garrow_record_batch_iterator_finalize(GObject *object) +{ + auto priv = GARROW_RECORD_BATCH_ITERATOR_GET_PRIVATE(object); + + priv->iterator.~Iterator(); + + G_OBJECT_CLASS(garrow_record_batch_iterator_parent_class)->finalize(object); +} + +static void +garrow_record_batch_iterator_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_RECORD_BATCH_ITERATOR_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_ITERATOR: + priv->iterator = + std::move(*static_cast<arrow::RecordBatchIterator *>(g_value_get_pointer(value))); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_record_batch_iterator_init(GArrowRecordBatchIterator *object) +{ + auto priv = GARROW_RECORD_BATCH_ITERATOR_GET_PRIVATE(object); + new(&priv->iterator) arrow::RecordBatchIterator; +} + +static void +garrow_record_batch_iterator_class_init(GArrowRecordBatchIteratorClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_record_batch_iterator_finalize; + gobject_class->set_property = garrow_record_batch_iterator_set_property; + + GParamSpec *spec; + + spec = g_param_spec_pointer("iterator", + "Iterator", + "The raw arrow::RecordBatchIterator", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_ITERATOR, spec); +} + +/** + * garrow_record_batch_iterator_new: + * @record_batches: (element-type GArrowRecordBatch): + * The record batches. + * + * Returns: A newly created #GArrowRecordBatchIterator. + * + * Since: 0.17.0 + */ +GArrowRecordBatchIterator * +garrow_record_batch_iterator_new(GList *record_batches) +{ + std::vector<std::shared_ptr<arrow::RecordBatch>> arrow_record_batches; + for (auto node = record_batches; node; node = node->next) { + auto record_batch = GARROW_RECORD_BATCH(node->data); + arrow_record_batches.push_back(garrow_record_batch_get_raw(record_batch)); + } + + auto arrow_iterator = arrow::MakeVectorIterator(arrow_record_batches); + return garrow_record_batch_iterator_new_raw(&arrow_iterator); +} + +/** + * garrow_record_batch_iterator_next: + * @iterator: A #GArrowRecordBatchIterator. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): + * The next #GArrowRecordBatch, or %NULL when the iterator is completed. + * + * Since: 0.17.0 + */ +GArrowRecordBatch * +garrow_record_batch_iterator_next(GArrowRecordBatchIterator *iterator, + GError **error) +{ + auto priv = GARROW_RECORD_BATCH_ITERATOR_GET_PRIVATE(iterator); + + auto result = priv->iterator.Next(); + if (garrow::check(error, result, "[record-batch-iterator][next]")) { + auto arrow_record_batch = *result; + if (arrow_record_batch) { + return garrow_record_batch_new_raw(&arrow_record_batch); + } + } + return NULL; +} + +/** + * garrow_record_batch_iterator_equal: + * @iterator: A #GArrowRecordBatchIterator. + * @other_iterator: A #GArrowRecordBatchIterator to be compared. + * + * Returns: %TRUE if both iterators are the same, %FALSE otherwise. + * + * Since: 0.17.0 + */ +gboolean +garrow_record_batch_iterator_equal(GArrowRecordBatchIterator *iterator, + GArrowRecordBatchIterator *other_iterator) +{ + auto priv = GARROW_RECORD_BATCH_ITERATOR_GET_PRIVATE(iterator); + auto priv_other = GARROW_RECORD_BATCH_ITERATOR_GET_PRIVATE(other_iterator); + return priv->iterator.Equals(priv_other->iterator); +} + +/** + * garrow_record_batch_iterator_to_list: + * @iterator: A #GArrowRecordBatchIterator. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (element-type GArrowRecordBatch) (transfer full): + * A #GList contains every moved elements from the iterator. + * + * Since: 0.17.0 + */ +GList* +garrow_record_batch_iterator_to_list(GArrowRecordBatchIterator *iterator, + GError **error) +{ + auto priv = GARROW_RECORD_BATCH_ITERATOR_GET_PRIVATE(iterator); + GList *record_batches = NULL; + for (auto arrow_record_batch_result : priv->iterator) { + if (!garrow::check(error, + arrow_record_batch_result, + "[record-batch-iterator][to-list]")) { + g_list_free_full(record_batches, g_object_unref); + return NULL; + } + auto arrow_record_batch = *std::move(arrow_record_batch_result); + auto record_batch = garrow_record_batch_new_raw(&arrow_record_batch); + record_batches = g_list_prepend(record_batches, record_batch); + } + return g_list_reverse(record_batches); +} + +G_END_DECLS + +GArrowRecordBatch * +garrow_record_batch_new_raw(std::shared_ptr<arrow::RecordBatch> *arrow_record_batch) +{ + auto record_batch = + GARROW_RECORD_BATCH(g_object_new(GARROW_TYPE_RECORD_BATCH, + "record-batch", arrow_record_batch, + NULL)); + return record_batch; +} + +std::shared_ptr<arrow::RecordBatch> +garrow_record_batch_get_raw(GArrowRecordBatch *record_batch) +{ + auto priv = GARROW_RECORD_BATCH_GET_PRIVATE(record_batch); + return priv->record_batch; +} + +GArrowRecordBatchIterator * +garrow_record_batch_iterator_new_raw(arrow::RecordBatchIterator *arrow_iterator) +{ + auto iterator = g_object_new(GARROW_TYPE_RECORD_BATCH_ITERATOR, + "iterator", arrow_iterator, + NULL); + return GARROW_RECORD_BATCH_ITERATOR(iterator); +} + +arrow::RecordBatchIterator * +garrow_record_batch_iterator_get_raw(GArrowRecordBatchIterator *iterator) +{ + auto priv = GARROW_RECORD_BATCH_ITERATOR_GET_PRIVATE(iterator); + return &priv->iterator; +} diff --git a/src/arrow/c_glib/arrow-glib/record-batch.h b/src/arrow/c_glib/arrow-glib/record-batch.h new file mode 100644 index 000000000..deca3c21b --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/record-batch.h @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/array.h> +#include <arrow-glib/ipc-options.h> +#include <arrow-glib/schema.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_RECORD_BATCH (garrow_record_batch_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatch, + garrow_record_batch, + GARROW, + RECORD_BATCH, + GObject) +struct _GArrowRecordBatchClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GArrowRecordBatch * +garrow_record_batch_import(gpointer c_abi_array, + GArrowSchema *schema, + GError **error); + +GArrowRecordBatch *garrow_record_batch_new(GArrowSchema *schema, + guint32 n_rows, + GList *columns, + GError **error); + +GARROW_AVAILABLE_IN_6_0 +gboolean +garrow_record_batch_export(GArrowRecordBatch *record_batch, + gpointer *c_abi_array, + gpointer *c_abi_schema, + GError **error); + +gboolean garrow_record_batch_equal(GArrowRecordBatch *record_batch, + GArrowRecordBatch *other_record_batch); +GARROW_AVAILABLE_IN_0_17 +gboolean +garrow_record_batch_equal_metadata(GArrowRecordBatch *record_batch, + GArrowRecordBatch *other_record_batch, + gboolean check_metadata); + +GArrowSchema *garrow_record_batch_get_schema (GArrowRecordBatch *record_batch); +GARROW_AVAILABLE_IN_0_15 +GArrowArray *garrow_record_batch_get_column_data(GArrowRecordBatch *record_batch, + gint i); +const gchar *garrow_record_batch_get_column_name(GArrowRecordBatch *record_batch, + gint i); +guint garrow_record_batch_get_n_columns (GArrowRecordBatch *record_batch); +gint64 garrow_record_batch_get_n_rows (GArrowRecordBatch *record_batch); +GArrowRecordBatch *garrow_record_batch_slice (GArrowRecordBatch *record_batch, + gint64 offset, + gint64 length); + +gchar *garrow_record_batch_to_string (GArrowRecordBatch *record_batch, + GError **error); +GArrowRecordBatch *garrow_record_batch_add_column(GArrowRecordBatch *record_batch, + guint i, + GArrowField *field, + GArrowArray *column, + GError **error); +GArrowRecordBatch *garrow_record_batch_remove_column(GArrowRecordBatch *record_batch, + guint i, + GError **error); +GARROW_AVAILABLE_IN_1_0 +GArrowBuffer * +garrow_record_batch_serialize(GArrowRecordBatch *record_batch, + GArrowWriteOptions *options, + GError **error); + + +#define GARROW_TYPE_RECORD_BATCH_ITERATOR \ + (garrow_record_batch_iterator_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchIterator, + garrow_record_batch_iterator, + GARROW, + RECORD_BATCH_ITERATOR, + GObject) +struct _GArrowRecordBatchIteratorClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_17 +GArrowRecordBatchIterator * +garrow_record_batch_iterator_new(GList *record_batches); + +GARROW_AVAILABLE_IN_0_17 +GArrowRecordBatch * +garrow_record_batch_iterator_next(GArrowRecordBatchIterator *iterator, + GError **error); + +GARROW_AVAILABLE_IN_0_17 +gboolean +garrow_record_batch_iterator_equal(GArrowRecordBatchIterator *iterator, + GArrowRecordBatchIterator *other_iterator); + +GARROW_AVAILABLE_IN_0_17 +GList * +garrow_record_batch_iterator_to_list(GArrowRecordBatchIterator *iterator, + GError **error); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/record-batch.hpp b/src/arrow/c_glib/arrow-glib/record-batch.hpp new file mode 100644 index 000000000..506811000 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/record-batch.hpp @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/api.h> + +#include <arrow-glib/record-batch.h> + +GArrowRecordBatch *garrow_record_batch_new_raw(std::shared_ptr<arrow::RecordBatch> *arrow_record_batch); +std::shared_ptr<arrow::RecordBatch> garrow_record_batch_get_raw(GArrowRecordBatch *record_batch); + +GArrowRecordBatchIterator * +garrow_record_batch_iterator_new_raw(arrow::RecordBatchIterator *arrow_iterator); + +arrow::RecordBatchIterator * +garrow_record_batch_iterator_get_raw(GArrowRecordBatchIterator *iterator); diff --git a/src/arrow/c_glib/arrow-glib/scalar.cpp b/src/arrow/c_glib/arrow-glib/scalar.cpp new file mode 100644 index 000000000..847b48620 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/scalar.cpp @@ -0,0 +1,2405 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/basic-array.hpp> +#include <arrow-glib/buffer.hpp> +#include <arrow-glib/data-type.hpp> +#include <arrow-glib/decimal.hpp> +#include <arrow-glib/error.hpp> +#include <arrow-glib/scalar.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: scalar + * @section_id: scalar-classes + * @title: Scalar classes + * @include: arrow-glib/arrow-glib.h + * + * #GArrowScalar is a base class for all scalar classes such as + * #GArrowBooleanScalar. + * + * #GArrowNullScalar is a class for a null scalar. + * + * #GArrowBooleanScalar is a class for a boolean scalar. + * + * #GArrowInt8Scalar is a class for a 8-bit integer scalar. + * + * #GArrowInt16Scalar is a class for a 16-bit integer scalar. + * + * #GArrowInt32Scalar is a class for a 32-bit integer scalar. + * + * #GArrowInt64Scalar is a class for a 64-bit integer scalar. + * + * #GArrowUInt8Scalar is a class for a 8-bit unsigned integer scalar. + * + * #GArrowUInt16Scalar is a class for a 16-bit unsigned integer scalar. + * + * #GArrowUInt32Scalar is a class for a 32-bit unsigned integer scalar. + * + * #GArrowUInt64Scalar is a class for a 64-bit unsigned integer scalar. + * + * #GArrowFloatScalar is a class for a 32-bit floating point scalar. + * + * #GArrowDoubleScalar is a class for a 64-bit floating point scalar. + * + * #GArrowBaseBinaryScalar is a base class for all binary and string + * scalar classes such as #GArrowBinaryScalar. + * + * #GArrowBinaryScalar is a class for a binary scalar. + * + * #GArrowStringScalar is a class for an UTF-8 encoded string scalar. + * + * #GArrowLargeBinaryScalar is a class for a 64-bit offsets binary + * scalar. + * + * #GArrowLargeStringScalar is a class for a 64-bit offsets UTF-8 + * encoded string scalar. + * + * #GArrowFixedSizeBinaryScalar is a class for a fixed-size binary + * scalar. + * + * #GArrowDate32Scalar is a class for the number of days since UNIX + * epoch in a 32-bit signed integer scalar. + * + * #GArrowDate64Scalar is a class for the number of milliseconds + * since UNIX epoch in a 64-bit signed integer scalar. + * + * #GArrowTime32Scalar is a class for the number of seconds or + * milliseconds since midnight in a 32-bit signed integer scalar. + * + * #GArrowTime64Scalar is a class for the number of microseconds or + * nanoseconds since midnight in a 64-bit signed integer scalar. + * + * #GArrowTimestampScalar is a class for the number of + * seconds/milliseconds/microseconds/nanoseconds since UNIX epoch in + * a 64-bit signed integer scalar. + * + * #GArrowDecimal128Scalar is a class for a 128-bit decimal scalar. + * + * #GArrowDecimal256Scalar is a class for a 256-bit decimal scalar. + * + * #GArrowBaseListScalar is a base class for all list scalar classes + * such as #GArrowListScalar. + * + * #GArrowListScalar is a class for a list scalar. + * + * #GArrowLargeListScalar is a class for a large list scalar. + * + * #GArrowMapScalar is a class for a map list scalar. + * + * #GArrowStructScalar is a class for a struct list scalar. + * + * #GArrowUnionScalar is a base class for all union scalar classes + * such as #GArrowSparseUnionScalar. + * + * #GArrowSparseUnionScalar is a class for a sparse union scalar. + * + * #GArrowDenseUnionScalar is a class for a dense union scalar. + * + * #GArrowExtensionScalar is a base class for user-defined extension + * scalar. + */ + +typedef struct GArrowScalarPrivate_ { + std::shared_ptr<arrow::Scalar> scalar; + GArrowDataType *data_type; +} GArrowScalarPrivate; + +enum { + PROP_SCALAR = 1, + PROP_DATA_TYPE, +}; + +G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowScalar, + garrow_scalar, + G_TYPE_OBJECT) + +#define GARROW_SCALAR_GET_PRIVATE(obj) \ + static_cast<GArrowScalarPrivate *>( \ + garrow_scalar_get_instance_private( \ + GARROW_SCALAR(obj))) + +static void +garrow_scalar_dispose(GObject *object) +{ + auto priv = GARROW_SCALAR_GET_PRIVATE(object); + + if (priv->data_type) { + g_object_unref(priv->data_type); + priv->data_type = NULL; + } + + G_OBJECT_CLASS(garrow_scalar_parent_class)->dispose(object); +} + +static void +garrow_scalar_finalize(GObject *object) +{ + auto priv = GARROW_SCALAR_GET_PRIVATE(object); + + priv->scalar.~shared_ptr(); + + G_OBJECT_CLASS(garrow_scalar_parent_class)->finalize(object); +} + +static void +garrow_scalar_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_SCALAR_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_SCALAR: + priv->scalar = + *static_cast<std::shared_ptr<arrow::Scalar> *>(g_value_get_pointer(value)); + break; + case PROP_DATA_TYPE: + priv->data_type = GARROW_DATA_TYPE(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_scalar_init(GArrowScalar *object) +{ + auto priv = GARROW_SCALAR_GET_PRIVATE(object); + new(&priv->scalar) std::shared_ptr<arrow::Scalar>; +} + +static void +garrow_scalar_class_init(GArrowScalarClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_scalar_dispose; + gobject_class->finalize = garrow_scalar_finalize; + gobject_class->set_property = garrow_scalar_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("scalar", + "Scalar", + "The raw std::shared<arrow::Scalar> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_SCALAR, spec); + + /** + * GArrowScalar:data-type: + * + * The data type of the scalar. + * + * Since: 5.0.0 + */ + spec = g_param_spec_object("data-type", + "Data type", + "The data type of the scalar", + GARROW_TYPE_DATA_TYPE, + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_DATA_TYPE, spec); +} + +/** + * garrow_scalar_parse: + * @data_type: A #GArrowDataType for the parsed scalar. + * @data: (array length=size): Data to be parsed. + * @size: The number of bytes of the data. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): + * A newly created #GArrowScalar if the data is parsed successfully, + * %NULL otherwise. + * + * Since: 5.0.0 + */ +GArrowScalar * +garrow_scalar_parse(GArrowDataType *data_type, + const guint8 *data, + gsize size, + GError **error) +{ + const auto arrow_data_type = garrow_data_type_get_raw(data_type); + auto arrow_data = + arrow::util::string_view(reinterpret_cast<const char *>(data), + size); + auto arrow_scalar_result = arrow::Scalar::Parse(arrow_data_type, arrow_data); + if (garrow::check(error, arrow_scalar_result, "[scalar][parse]")) { + auto arrow_scalar = *arrow_scalar_result; + return garrow_scalar_new_raw(&arrow_scalar, + "scalar", &arrow_scalar, + "data-type", data_type, + NULL); + } else { + return NULL; + } +} + +/** + * garrow_scalar_get_data_type: + * @scalar: A #GArrowScalar. + * + * Returns: (transfer none): The #GArrowDataType for the scalar. + * + * Since: 5.0.0 + */ +GArrowDataType * +garrow_scalar_get_data_type(GArrowScalar *scalar) +{ + auto priv = GARROW_SCALAR_GET_PRIVATE(scalar); + if (!priv->data_type) { + priv->data_type = garrow_data_type_new_raw(&(priv->scalar->type)); + } + return priv->data_type; +} + +/** + * garrow_scalar_is_valid: + * @scalar: A #GArrowScalar. + * + * Returns: %TRUE if the scalar is valid, %FALSE otherwise. + * + * Since: 5.0.0 + */ +gboolean +garrow_scalar_is_valid(GArrowScalar *scalar) +{ + const auto arrow_scalar = garrow_scalar_get_raw(scalar); + return arrow_scalar->is_valid; +} + +/** + * garrow_scalar_equal: + * @scalar: A #GArrowScalar. + * @other_scalar: A #GArrowScalar to be compared. + * + * Returns: %TRUE if both of them have the same data, %FALSE + * otherwise. + * + * Since: 5.0.0 + */ +gboolean +garrow_scalar_equal(GArrowScalar *scalar, + GArrowScalar *other_scalar) +{ + return garrow_scalar_equal_options(scalar, other_scalar, NULL); +} + +/** + * garrow_scalar_equal_options: + * @scalar: A #GArrowScalar. + * @other_scalar: A #GArrowScalar to be compared. + * @options: (nullable): A #GArrowEqualOptions. + * + * Returns: %TRUE if both of them have the same data, %FALSE + * otherwise. + * + * Since: 5.0.0 + */ +gboolean +garrow_scalar_equal_options(GArrowScalar *scalar, + GArrowScalar *other_scalar, + GArrowEqualOptions *options) +{ + const auto arrow_scalar = garrow_scalar_get_raw(scalar); + const auto arrow_other_scalar = garrow_scalar_get_raw(other_scalar); + if (options) { + auto is_approx = garrow_equal_options_is_approx(options); + const auto arrow_options = garrow_equal_options_get_raw(options); + if (is_approx) { + return arrow_scalar->ApproxEquals(*arrow_other_scalar, *arrow_options); + } else { + return arrow_scalar->Equals(arrow_other_scalar, *arrow_options); + } + } else { + return arrow_scalar->Equals(arrow_other_scalar); + } +} + +/** + * garrow_scalar_to_string: + * @scalar: A #GArrowScalar. + * + * Returns: The string representation of the scalar. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 5.0.0 + */ +gchar * +garrow_scalar_to_string(GArrowScalar *scalar) +{ + const auto arrow_scalar = garrow_scalar_get_raw(scalar); + return g_strdup(arrow_scalar->ToString().c_str()); +} + +/** + * garrow_scalar_cast: + * @scalar: A #GArrowScalar. + * @data_type: A #GArrowDataType of the casted scalar. + * @options: (nullable): A #GArrowCastOptions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): + * A newly created casted scalar on success, %NULL on error. + * + * Since: 5.0.0 + */ +GArrowScalar * +garrow_scalar_cast(GArrowScalar *scalar, + GArrowDataType *data_type, + GArrowCastOptions *options, + GError **error) +{ + const auto arrow_scalar = garrow_scalar_get_raw(scalar); + const auto arrow_data_type = garrow_data_type_get_raw(data_type); + auto arrow_casted_scalar_result = arrow_scalar->CastTo(arrow_data_type); + if (garrow::check(error, arrow_casted_scalar_result, "[scalar][cast]")) { + auto arrow_casted_scalar = *arrow_casted_scalar_result; + return garrow_scalar_new_raw(&arrow_casted_scalar, + "scalar", &arrow_casted_scalar, + "data-type", data_type, + NULL); + } else { + return NULL; + } +} + + +G_DEFINE_TYPE(GArrowNullScalar, + garrow_null_scalar, + GARROW_TYPE_SCALAR) + +static void +garrow_null_scalar_init(GArrowNullScalar *object) +{ +} + +static void +garrow_null_scalar_class_init(GArrowNullScalarClass *klass) +{ +} + +/** + * garrow_null_scalar_new: + * + * Returns: A newly created #GArrowNullScalar. + * + * Since: 5.0.0 + */ +GArrowNullScalar * +garrow_null_scalar_new(void) +{ + auto arrow_scalar = + std::static_pointer_cast<arrow::Scalar>( + std::make_shared<arrow::NullScalar>()); + return GARROW_NULL_SCALAR(garrow_scalar_new_raw(&arrow_scalar)); +} + + +G_DEFINE_TYPE(GArrowBooleanScalar, + garrow_boolean_scalar, + GARROW_TYPE_SCALAR) + +static void +garrow_boolean_scalar_init(GArrowBooleanScalar *object) +{ +} + +static void +garrow_boolean_scalar_class_init(GArrowBooleanScalarClass *klass) +{ +} + +/** + * garrow_boolean_scalar_new: + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowBooleanScalar. + * + * Since: 5.0.0 + */ +GArrowBooleanScalar * +garrow_boolean_scalar_new(gboolean value) +{ + auto arrow_scalar = + std::static_pointer_cast<arrow::Scalar>( + std::make_shared<arrow::BooleanScalar>(value)); + return GARROW_BOOLEAN_SCALAR(garrow_scalar_new_raw(&arrow_scalar)); +} + +/** + * garrow_boolean_scalar_get_value: + * @scalar: A #GArrowBooleanScalar. + * + * Returns: The value of this scalar. + * + * Since: 5.0.0 + */ +gboolean +garrow_boolean_scalar_get_value(GArrowBooleanScalar *scalar) +{ + const auto arrow_scalar = + std::static_pointer_cast<arrow::BooleanScalar>( + garrow_scalar_get_raw(GARROW_SCALAR(scalar))); + return arrow_scalar->value; +} + + +G_DEFINE_TYPE(GArrowInt8Scalar, + garrow_int8_scalar, + GARROW_TYPE_SCALAR) + +static void +garrow_int8_scalar_init(GArrowInt8Scalar *object) +{ +} + +static void +garrow_int8_scalar_class_init(GArrowInt8ScalarClass *klass) +{ +} + +/** + * garrow_int8_scalar_new: + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowInt8Scalar. + * + * Since: 5.0.0 + */ +GArrowInt8Scalar * +garrow_int8_scalar_new(gint8 value) +{ + auto arrow_scalar = + std::static_pointer_cast<arrow::Scalar>( + std::make_shared<arrow::Int8Scalar>(value)); + return GARROW_INT8_SCALAR(garrow_scalar_new_raw(&arrow_scalar)); +} + +/** + * garrow_int8_scalar_get_value: + * @scalar: A #GArrowInt8Scalar. + * + * Returns: The value of this scalar. + * + * Since: 5.0.0 + */ +gint8 +garrow_int8_scalar_get_value(GArrowInt8Scalar *scalar) +{ + const auto arrow_scalar = + std::static_pointer_cast<arrow::Int8Scalar>( + garrow_scalar_get_raw(GARROW_SCALAR(scalar))); + return arrow_scalar->value; +} + + +G_DEFINE_TYPE(GArrowInt16Scalar, + garrow_int16_scalar, + GARROW_TYPE_SCALAR) + +static void +garrow_int16_scalar_init(GArrowInt16Scalar *object) +{ +} + +static void +garrow_int16_scalar_class_init(GArrowInt16ScalarClass *klass) +{ +} + +/** + * garrow_int16_scalar_new: + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowInt16Scalar. + * + * Since: 5.0.0 + */ +GArrowInt16Scalar * +garrow_int16_scalar_new(gint16 value) +{ + auto arrow_scalar = + std::static_pointer_cast<arrow::Scalar>( + std::make_shared<arrow::Int16Scalar>(value)); + return GARROW_INT16_SCALAR(garrow_scalar_new_raw(&arrow_scalar)); +} + +/** + * garrow_int16_scalar_get_value: + * @scalar: A #GArrowInt16Scalar. + * + * Returns: The value of this scalar. + * + * Since: 5.0.0 + */ +gint16 +garrow_int16_scalar_get_value(GArrowInt16Scalar *scalar) +{ + const auto arrow_scalar = + std::static_pointer_cast<arrow::Int16Scalar>( + garrow_scalar_get_raw(GARROW_SCALAR(scalar))); + return arrow_scalar->value; +} + + +G_DEFINE_TYPE(GArrowInt32Scalar, + garrow_int32_scalar, + GARROW_TYPE_SCALAR) + +static void +garrow_int32_scalar_init(GArrowInt32Scalar *object) +{ +} + +static void +garrow_int32_scalar_class_init(GArrowInt32ScalarClass *klass) +{ +} + +/** + * garrow_int32_scalar_new: + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowInt32Scalar. + * + * Since: 5.0.0 + */ +GArrowInt32Scalar * +garrow_int32_scalar_new(gint32 value) +{ + auto arrow_scalar = + std::static_pointer_cast<arrow::Scalar>( + std::make_shared<arrow::Int32Scalar>(value)); + return GARROW_INT32_SCALAR(garrow_scalar_new_raw(&arrow_scalar)); +} + +/** + * garrow_int32_scalar_get_value: + * @scalar: A #GArrowInt32Scalar. + * + * Returns: The value of this scalar. + * + * Since: 5.0.0 + */ +gint32 +garrow_int32_scalar_get_value(GArrowInt32Scalar *scalar) +{ + const auto arrow_scalar = + std::static_pointer_cast<arrow::Int32Scalar>( + garrow_scalar_get_raw(GARROW_SCALAR(scalar))); + return arrow_scalar->value; +} + + +G_DEFINE_TYPE(GArrowInt64Scalar, + garrow_int64_scalar, + GARROW_TYPE_SCALAR) + +static void +garrow_int64_scalar_init(GArrowInt64Scalar *object) +{ +} + +static void +garrow_int64_scalar_class_init(GArrowInt64ScalarClass *klass) +{ +} + +/** + * garrow_int64_scalar_new: + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowInt64Scalar. + * + * Since: 5.0.0 + */ +GArrowInt64Scalar * +garrow_int64_scalar_new(gint64 value) +{ + auto arrow_scalar = + std::static_pointer_cast<arrow::Scalar>( + std::make_shared<arrow::Int64Scalar>(value)); + return GARROW_INT64_SCALAR(garrow_scalar_new_raw(&arrow_scalar)); +} + +/** + * garrow_int64_scalar_get_value: + * @scalar: A #GArrowInt64Scalar. + * + * Returns: The value of this scalar. + * + * Since: 5.0.0 + */ +gint64 +garrow_int64_scalar_get_value(GArrowInt64Scalar *scalar) +{ + const auto arrow_scalar = + std::static_pointer_cast<arrow::Int64Scalar>( + garrow_scalar_get_raw(GARROW_SCALAR(scalar))); + return arrow_scalar->value; +} + + +G_DEFINE_TYPE(GArrowUInt8Scalar, + garrow_uint8_scalar, + GARROW_TYPE_SCALAR) + +static void +garrow_uint8_scalar_init(GArrowUInt8Scalar *object) +{ +} + +static void +garrow_uint8_scalar_class_init(GArrowUInt8ScalarClass *klass) +{ +} + +/** + * garrow_uint8_scalar_new: + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowUInt8Scalar. + * + * Since: 5.0.0 + */ +GArrowUInt8Scalar * +garrow_uint8_scalar_new(guint8 value) +{ + auto arrow_scalar = + std::static_pointer_cast<arrow::Scalar>( + std::make_shared<arrow::UInt8Scalar>(value)); + return GARROW_UINT8_SCALAR(garrow_scalar_new_raw(&arrow_scalar)); +} + +/** + * garrow_uint8_scalar_get_value: + * @scalar: A #GArrowUInt8Scalar. + * + * Returns: The value of this scalar. + * + * Since: 5.0.0 + */ +guint8 +garrow_uint8_scalar_get_value(GArrowUInt8Scalar *scalar) +{ + const auto arrow_scalar = + std::static_pointer_cast<arrow::UInt8Scalar>( + garrow_scalar_get_raw(GARROW_SCALAR(scalar))); + return arrow_scalar->value; +} + + +G_DEFINE_TYPE(GArrowUInt16Scalar, + garrow_uint16_scalar, + GARROW_TYPE_SCALAR) + +static void +garrow_uint16_scalar_init(GArrowUInt16Scalar *object) +{ +} + +static void +garrow_uint16_scalar_class_init(GArrowUInt16ScalarClass *klass) +{ +} + +/** + * garrow_uint16_scalar_new: + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowUInt16Scalar. + * + * Since: 5.0.0 + */ +GArrowUInt16Scalar * +garrow_uint16_scalar_new(guint16 value) +{ + auto arrow_scalar = + std::static_pointer_cast<arrow::Scalar>( + std::make_shared<arrow::UInt16Scalar>(value)); + return GARROW_UINT16_SCALAR(garrow_scalar_new_raw(&arrow_scalar)); +} + +/** + * garrow_uint16_scalar_get_value: + * @scalar: A #GArrowUInt16Scalar. + * + * Returns: The value of this scalar. + * + * Since: 5.0.0 + */ +guint16 +garrow_uint16_scalar_get_value(GArrowUInt16Scalar *scalar) +{ + const auto arrow_scalar = + std::static_pointer_cast<arrow::UInt16Scalar>( + garrow_scalar_get_raw(GARROW_SCALAR(scalar))); + return arrow_scalar->value; +} + + +G_DEFINE_TYPE(GArrowUInt32Scalar, + garrow_uint32_scalar, + GARROW_TYPE_SCALAR) + +static void +garrow_uint32_scalar_init(GArrowUInt32Scalar *object) +{ +} + +static void +garrow_uint32_scalar_class_init(GArrowUInt32ScalarClass *klass) +{ +} + +/** + * garrow_uint32_scalar_new: + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowUInt32Scalar. + * + * Since: 5.0.0 + */ +GArrowUInt32Scalar * +garrow_uint32_scalar_new(guint32 value) +{ + auto arrow_scalar = + std::static_pointer_cast<arrow::Scalar>( + std::make_shared<arrow::UInt32Scalar>(value)); + return GARROW_UINT32_SCALAR(garrow_scalar_new_raw(&arrow_scalar)); +} + +/** + * garrow_uint32_scalar_get_value: + * @scalar: A #GArrowUInt32Scalar. + * + * Returns: The value of this scalar. + * + * Since: 5.0.0 + */ +guint32 +garrow_uint32_scalar_get_value(GArrowUInt32Scalar *scalar) +{ + const auto arrow_scalar = + std::static_pointer_cast<arrow::UInt32Scalar>( + garrow_scalar_get_raw(GARROW_SCALAR(scalar))); + return arrow_scalar->value; +} + + +G_DEFINE_TYPE(GArrowUInt64Scalar, + garrow_uint64_scalar, + GARROW_TYPE_SCALAR) + +static void +garrow_uint64_scalar_init(GArrowUInt64Scalar *object) +{ +} + +static void +garrow_uint64_scalar_class_init(GArrowUInt64ScalarClass *klass) +{ +} + +/** + * garrow_uint64_scalar_new: + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowUInt64Scalar. + * + * Since: 5.0.0 + */ +GArrowUInt64Scalar * +garrow_uint64_scalar_new(guint64 value) +{ + auto arrow_scalar = + std::static_pointer_cast<arrow::Scalar>( + std::make_shared<arrow::UInt64Scalar>(value)); + return GARROW_UINT64_SCALAR(garrow_scalar_new_raw(&arrow_scalar)); +} + +/** + * garrow_uint64_scalar_get_value: + * @scalar: A #GArrowUInt64Scalar. + * + * Returns: The value of this scalar. + * + * Since: 5.0.0 + */ +guint64 +garrow_uint64_scalar_get_value(GArrowUInt64Scalar *scalar) +{ + const auto arrow_scalar = + std::static_pointer_cast<arrow::UInt64Scalar>( + garrow_scalar_get_raw(GARROW_SCALAR(scalar))); + return arrow_scalar->value; +} + + +G_DEFINE_TYPE(GArrowFloatScalar, + garrow_float_scalar, + GARROW_TYPE_SCALAR) + +static void +garrow_float_scalar_init(GArrowFloatScalar *object) +{ +} + +static void +garrow_float_scalar_class_init(GArrowFloatScalarClass *klass) +{ +} + +/** + * garrow_float_scalar_new: + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowFloatScalar. + * + * Since: 5.0.0 + */ +GArrowFloatScalar * +garrow_float_scalar_new(gfloat value) +{ + auto arrow_scalar = + std::static_pointer_cast<arrow::Scalar>( + std::make_shared<arrow::FloatScalar>(value)); + return GARROW_FLOAT_SCALAR(garrow_scalar_new_raw(&arrow_scalar)); +} + +/** + * garrow_float_scalar_get_value: + * @scalar: A #GArrowFloatScalar. + * + * Returns: The value of this scalar. + * + * Since: 5.0.0 + */ +gfloat +garrow_float_scalar_get_value(GArrowFloatScalar *scalar) +{ + const auto arrow_scalar = + std::static_pointer_cast<arrow::FloatScalar>( + garrow_scalar_get_raw(GARROW_SCALAR(scalar))); + return arrow_scalar->value; +} + + +G_DEFINE_TYPE(GArrowDoubleScalar, + garrow_double_scalar, + GARROW_TYPE_SCALAR) + +static void +garrow_double_scalar_init(GArrowDoubleScalar *object) +{ +} + +static void +garrow_double_scalar_class_init(GArrowDoubleScalarClass *klass) +{ +} + +/** + * garrow_double_scalar_new: + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowDoubleScalar. + * + * Since: 5.0.0 + */ +GArrowDoubleScalar * +garrow_double_scalar_new(gdouble value) +{ + auto arrow_scalar = + std::static_pointer_cast<arrow::Scalar>( + std::make_shared<arrow::DoubleScalar>(value)); + return GARROW_DOUBLE_SCALAR(garrow_scalar_new_raw(&arrow_scalar)); +} + +/** + * garrow_double_scalar_get_value: + * @scalar: A #GArrowDoubleScalar. + * + * Returns: The value of this scalar. + * + * Since: 5.0.0 + */ +gdouble +garrow_double_scalar_get_value(GArrowDoubleScalar *scalar) +{ + const auto arrow_scalar = + std::static_pointer_cast<arrow::DoubleScalar>( + garrow_scalar_get_raw(GARROW_SCALAR(scalar))); + return arrow_scalar->value; +} + + +typedef struct GArrowBaseBinaryScalarPrivate_ { + GArrowBuffer *value; +} GArrowBaseBinaryScalarPrivate; + +enum { + PROP_VALUE = 1, +}; + +G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowBaseBinaryScalar, + garrow_base_binary_scalar, + GARROW_TYPE_SCALAR) + +#define GARROW_BASE_BINARY_SCALAR_GET_PRIVATE(obj) \ + static_cast<GArrowBaseBinaryScalarPrivate *>( \ + garrow_base_binary_scalar_get_instance_private( \ + GARROW_BASE_BINARY_SCALAR(obj))) + +static void +garrow_base_binary_scalar_dispose(GObject *object) +{ + auto priv = GARROW_BASE_BINARY_SCALAR_GET_PRIVATE(object); + + if (priv->value) { + g_object_unref(priv->value); + priv->value = NULL; + } + + G_OBJECT_CLASS(garrow_base_binary_scalar_parent_class)->dispose(object); +} + +static void +garrow_base_binary_scalar_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_BASE_BINARY_SCALAR_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_VALUE: + priv->value = GARROW_BUFFER(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_base_binary_scalar_init(GArrowBaseBinaryScalar *object) +{ +} + +static void +garrow_base_binary_scalar_class_init(GArrowBaseBinaryScalarClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->dispose = garrow_base_binary_scalar_dispose; + gobject_class->set_property = garrow_base_binary_scalar_set_property; + + GParamSpec *spec; + /** + * GArrowBaseBinaryScalar:value: + * + * The value of the scalar. + * + * Since: 5.0.0 + */ + spec = g_param_spec_object("value", + "Value", + "The value of the scalar", + GARROW_TYPE_BUFFER, + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_VALUE, spec); +} + +G_END_DECLS +template<typename ArrowBinaryScalarType> +GArrowScalar * +garrow_base_binary_scalar_new(GArrowBuffer *value) +{ + auto arrow_value = garrow_buffer_get_raw(value); + auto arrow_scalar = + std::static_pointer_cast<arrow::Scalar>( + std::make_shared<ArrowBinaryScalarType>(arrow_value)); + return garrow_scalar_new_raw(&arrow_scalar, + "scalar", &arrow_scalar, + "value", value, + NULL); +} +G_BEGIN_DECLS + +/** + * garrow_base_binary_scalar_get_value: + * @scalar: A #GArrowBaseBinaryScalar. + * + * Returns: (transfer none): The value of this scalar. + * + * Since: 5.0.0 + */ +GArrowBuffer * +garrow_base_binary_scalar_get_value(GArrowBaseBinaryScalar *scalar) +{ + auto priv = GARROW_BASE_BINARY_SCALAR_GET_PRIVATE(scalar); + if (!priv->value) { + const auto arrow_scalar = + std::static_pointer_cast<arrow::BaseBinaryScalar>( + garrow_scalar_get_raw(GARROW_SCALAR(scalar))); + priv->value = garrow_buffer_new_raw(&(arrow_scalar->value)); + } + return priv->value; +} + + +G_DEFINE_TYPE(GArrowBinaryScalar, + garrow_binary_scalar, + GARROW_TYPE_BASE_BINARY_SCALAR) + +static void +garrow_binary_scalar_init(GArrowBinaryScalar *object) +{ +} + +static void +garrow_binary_scalar_class_init(GArrowBinaryScalarClass *klass) +{ +} + +/** + * garrow_binary_scalar_new: + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowBinaryScalar. + * + * Since: 5.0.0 + */ +GArrowBinaryScalar * +garrow_binary_scalar_new(GArrowBuffer *value) +{ + return GARROW_BINARY_SCALAR( + garrow_base_binary_scalar_new<arrow::BinaryScalar>(value)); +} + + +G_DEFINE_TYPE(GArrowStringScalar, + garrow_string_scalar, + GARROW_TYPE_BASE_BINARY_SCALAR) + +static void +garrow_string_scalar_init(GArrowStringScalar *object) +{ +} + +static void +garrow_string_scalar_class_init(GArrowStringScalarClass *klass) +{ +} + +/** + * garrow_string_scalar_new: + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowStringScalar. + * + * Since: 5.0.0 + */ +GArrowStringScalar * +garrow_string_scalar_new(GArrowBuffer *value) +{ + return GARROW_STRING_SCALAR( + garrow_base_binary_scalar_new<arrow::StringScalar>(value)); +} + + +G_DEFINE_TYPE(GArrowLargeBinaryScalar, + garrow_large_binary_scalar, + GARROW_TYPE_BASE_BINARY_SCALAR) + +static void +garrow_large_binary_scalar_init(GArrowLargeBinaryScalar *object) +{ +} + +static void +garrow_large_binary_scalar_class_init(GArrowLargeBinaryScalarClass *klass) +{ +} + +/** + * garrow_large_binary_scalar_new: + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowLargeBinaryScalar. + * + * Since: 5.0.0 + */ +GArrowLargeBinaryScalar * +garrow_large_binary_scalar_new(GArrowBuffer *value) +{ + return GARROW_LARGE_BINARY_SCALAR( + garrow_base_binary_scalar_new<arrow::LargeBinaryScalar>(value)); +} + + +G_DEFINE_TYPE(GArrowLargeStringScalar, + garrow_large_string_scalar, + GARROW_TYPE_BASE_BINARY_SCALAR) + +static void +garrow_large_string_scalar_init(GArrowLargeStringScalar *object) +{ +} + +static void +garrow_large_string_scalar_class_init(GArrowLargeStringScalarClass *klass) +{ +} + +/** + * garrow_large_string_scalar_new: + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowLargeStringScalar. + * + * Since: 5.0.0 + */ +GArrowLargeStringScalar * +garrow_large_string_scalar_new(GArrowBuffer *value) +{ + return GARROW_LARGE_STRING_SCALAR( + garrow_base_binary_scalar_new<arrow::LargeStringScalar>(value)); +} + + +G_DEFINE_TYPE(GArrowFixedSizeBinaryScalar, + garrow_fixed_size_binary_scalar, + GARROW_TYPE_BASE_BINARY_SCALAR) + +static void +garrow_fixed_size_binary_scalar_init(GArrowFixedSizeBinaryScalar *object) +{ +} + +static void +garrow_fixed_size_binary_scalar_class_init( + GArrowFixedSizeBinaryScalarClass *klass) +{ +} + +/** + * garrow_fixed_size_binary_scalar_new: + * @data_type: A #GArrowFixedSizeBinaryDataType for this scalar. + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowFixedSizeBinaryScalar. + * + * Since: 5.0.0 + */ +GArrowFixedSizeBinaryScalar * +garrow_fixed_size_binary_scalar_new(GArrowFixedSizeBinaryDataType *data_type, + GArrowBuffer *value) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto arrow_value = garrow_buffer_get_raw(value); + auto arrow_scalar = + std::static_pointer_cast<arrow::Scalar>( + std::make_shared<arrow::FixedSizeBinaryScalar>( + arrow_value, arrow_data_type)); + return GARROW_FIXED_SIZE_BINARY_SCALAR( + garrow_scalar_new_raw(&arrow_scalar, + "scalar", &arrow_scalar, + "data-type", data_type, + "value", value, + NULL)); +} + + +G_DEFINE_TYPE(GArrowDate32Scalar, + garrow_date32_scalar, + GARROW_TYPE_SCALAR) + +static void +garrow_date32_scalar_init(GArrowDate32Scalar *object) +{ +} + +static void +garrow_date32_scalar_class_init(GArrowDate32ScalarClass *klass) +{ +} + +/** + * garrow_date32_scalar_new: + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowDate32Scalar. + * + * Since: 5.0.0 + */ +GArrowDate32Scalar * +garrow_date32_scalar_new(gint32 value) +{ + auto arrow_scalar = + std::static_pointer_cast<arrow::Scalar>( + std::make_shared<arrow::Date32Scalar>(value)); + return GARROW_DATE32_SCALAR(garrow_scalar_new_raw(&arrow_scalar)); +} + +/** + * garrow_date32_scalar_get_value: + * @scalar: A #GArrowDate32Scalar. + * + * Returns: The value of this scalar. + * + * Since: 5.0.0 + */ +gint32 +garrow_date32_scalar_get_value(GArrowDate32Scalar *scalar) +{ + const auto arrow_scalar = + std::static_pointer_cast<arrow::Date32Scalar>( + garrow_scalar_get_raw(GARROW_SCALAR(scalar))); + return arrow_scalar->value; +} + + +G_DEFINE_TYPE(GArrowDate64Scalar, + garrow_date64_scalar, + GARROW_TYPE_SCALAR) + +static void +garrow_date64_scalar_init(GArrowDate64Scalar *object) +{ +} + +static void +garrow_date64_scalar_class_init(GArrowDate64ScalarClass *klass) +{ +} + +/** + * garrow_date64_scalar_new: + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowDate64Scalar. + * + * Since: 5.0.0 + */ +GArrowDate64Scalar * +garrow_date64_scalar_new(gint64 value) +{ + auto arrow_scalar = + std::static_pointer_cast<arrow::Scalar>( + std::make_shared<arrow::Date64Scalar>(value)); + return GARROW_DATE64_SCALAR(garrow_scalar_new_raw(&arrow_scalar)); +} + +/** + * garrow_date64_scalar_get_value: + * @scalar: A #GArrowDate64Scalar. + * + * Returns: The value of this scalar. + * + * Since: 5.0.0 + */ +gint64 +garrow_date64_scalar_get_value(GArrowDate64Scalar *scalar) +{ + const auto arrow_scalar = + std::static_pointer_cast<arrow::Date64Scalar>( + garrow_scalar_get_raw(GARROW_SCALAR(scalar))); + return arrow_scalar->value; +} + + +G_DEFINE_TYPE(GArrowTime32Scalar, + garrow_time32_scalar, + GARROW_TYPE_SCALAR) + +static void +garrow_time32_scalar_init(GArrowTime32Scalar *object) +{ +} + +static void +garrow_time32_scalar_class_init(GArrowTime32ScalarClass *klass) +{ +} + +/** + * garrow_time32_scalar_new: + * @data_type: A #GArrowTime32DataType for this scalar. + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowTime32Scalar. + * + * Since: 5.0.0 + */ +GArrowTime32Scalar * +garrow_time32_scalar_new(GArrowTime32DataType *data_type, + gint32 value) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto arrow_scalar = + std::static_pointer_cast<arrow::Scalar>( + std::make_shared<arrow::Time32Scalar>(value, arrow_data_type)); + return GARROW_TIME32_SCALAR( + garrow_scalar_new_raw(&arrow_scalar, + "scalar", &arrow_scalar, + "data-type", data_type, + NULL)); +} + +/** + * garrow_time32_scalar_get_value: + * @scalar: A #GArrowTime32Scalar. + * + * Returns: The value of this scalar. + * + * Since: 5.0.0 + */ +gint32 +garrow_time32_scalar_get_value(GArrowTime32Scalar *scalar) +{ + const auto arrow_scalar = + std::static_pointer_cast<arrow::Time32Scalar>( + garrow_scalar_get_raw(GARROW_SCALAR(scalar))); + return arrow_scalar->value; +} + + +G_DEFINE_TYPE(GArrowTime64Scalar, + garrow_time64_scalar, + GARROW_TYPE_SCALAR) + +static void +garrow_time64_scalar_init(GArrowTime64Scalar *object) +{ +} + +static void +garrow_time64_scalar_class_init(GArrowTime64ScalarClass *klass) +{ +} + +/** + * garrow_time64_scalar_new: + * @data_type: A #GArrowTime64DataType for this scalar. + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowTime64Scalar. + * + * Since: 5.0.0 + */ +GArrowTime64Scalar * +garrow_time64_scalar_new(GArrowTime64DataType *data_type, + gint64 value) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto arrow_scalar = + std::static_pointer_cast<arrow::Scalar>( + std::make_shared<arrow::Time64Scalar>(value, arrow_data_type)); + return GARROW_TIME64_SCALAR( + garrow_scalar_new_raw(&arrow_scalar, + "scalar", &arrow_scalar, + "data-type", data_type, + NULL)); +} + +/** + * garrow_time64_scalar_get_value: + * @scalar: A #GArrowTime64Scalar. + * + * Returns: The value of this scalar. + * + * Since: 5.0.0 + */ +gint64 +garrow_time64_scalar_get_value(GArrowTime64Scalar *scalar) +{ + const auto arrow_scalar = + std::static_pointer_cast<arrow::Time64Scalar>( + garrow_scalar_get_raw(GARROW_SCALAR(scalar))); + return arrow_scalar->value; +} + + +G_DEFINE_TYPE(GArrowTimestampScalar, + garrow_timestamp_scalar, + GARROW_TYPE_SCALAR) + +static void +garrow_timestamp_scalar_init(GArrowTimestampScalar *object) +{ +} + +static void +garrow_timestamp_scalar_class_init(GArrowTimestampScalarClass *klass) +{ +} + +/** + * garrow_timestamp_scalar_new: + * @data_type: A #GArrowTimestampDataType for this scalar. + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowTimestampScalar. + * + * Since: 5.0.0 + */ +GArrowTimestampScalar * +garrow_timestamp_scalar_new(GArrowTimestampDataType *data_type, + gint64 value) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto arrow_scalar = + std::static_pointer_cast<arrow::Scalar>( + std::make_shared<arrow::TimestampScalar>(value, arrow_data_type)); + return GARROW_TIMESTAMP_SCALAR( + garrow_scalar_new_raw(&arrow_scalar, + "scalar", &arrow_scalar, + "data-type", data_type, + NULL)); +} + +/** + * garrow_timestamp_scalar_get_value: + * @scalar: A #GArrowTimestampScalar. + * + * Returns: The value of this scalar. + * + * Since: 5.0.0 + */ +gint64 +garrow_timestamp_scalar_get_value(GArrowTimestampScalar *scalar) +{ + const auto arrow_scalar = + std::static_pointer_cast<arrow::TimestampScalar>( + garrow_scalar_get_raw(GARROW_SCALAR(scalar))); + return arrow_scalar->value; +} + + +typedef struct GArrowDecimal128ScalarPrivate_ { + GArrowDecimal128 *value; +} GArrowDecimal128ScalarPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowDecimal128Scalar, + garrow_decimal128_scalar, + GARROW_TYPE_SCALAR) + +#define GARROW_DECIMAL128_SCALAR_GET_PRIVATE(obj) \ + static_cast<GArrowDecimal128ScalarPrivate *>( \ + garrow_decimal128_scalar_get_instance_private( \ + GARROW_DECIMAL128_SCALAR(obj))) + +static void +garrow_decimal128_scalar_dispose(GObject *object) +{ + auto priv = GARROW_DECIMAL128_SCALAR_GET_PRIVATE(object); + + if (priv->value) { + g_object_unref(priv->value); + priv->value = NULL; + } + + G_OBJECT_CLASS(garrow_decimal128_scalar_parent_class)->dispose(object); +} + +static void +garrow_decimal128_scalar_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_DECIMAL128_SCALAR_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_VALUE: + priv->value = GARROW_DECIMAL128(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_decimal128_scalar_init(GArrowDecimal128Scalar *object) +{ +} + +static void +garrow_decimal128_scalar_class_init(GArrowDecimal128ScalarClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_decimal128_scalar_dispose; + gobject_class->set_property = garrow_decimal128_scalar_set_property; + + GParamSpec *spec; + /** + * GArrowDecimal128Scalar:value: + * + * The value of the scalar. + * + * Since: 5.0.0 + */ + spec = g_param_spec_object("value", + "Value", + "The value of the scalar", + garrow_decimal128_get_type(), + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_VALUE, spec); +} + +/** + * garrow_decimal128_scalar_new: + * @data_type: A #GArrowDecimal128DataType for this scalar. + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowDecimal128Scalar. + * + * Since: 5.0.0 + */ +GArrowDecimal128Scalar * +garrow_decimal128_scalar_new(GArrowDecimal128DataType *data_type, + GArrowDecimal128 *value) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto arrow_value = garrow_decimal128_get_raw(value); + auto arrow_scalar = + std::static_pointer_cast<arrow::Scalar>( + std::make_shared<arrow::Decimal128Scalar>(*arrow_value, arrow_data_type)); + return GARROW_DECIMAL128_SCALAR( + garrow_scalar_new_raw(&arrow_scalar, + "scalar", &arrow_scalar, + "data-type", data_type, + "value", value, + NULL)); +} + +/** + * garrow_decimal128_scalar_get_value: + * @scalar: A #GArrowDecimal128Scalar. + * + * Returns: (transfer none): The value of this scalar. + * + * Since: 5.0.0 + */ +GArrowDecimal128 * +garrow_decimal128_scalar_get_value(GArrowDecimal128Scalar *scalar) +{ + auto priv = GARROW_DECIMAL128_SCALAR_GET_PRIVATE(scalar); + if (!priv->value) { + auto arrow_scalar = + std::static_pointer_cast<arrow::Decimal128Scalar>( + garrow_scalar_get_raw(GARROW_SCALAR(scalar))); + auto arrow_value = std::make_shared<arrow::Decimal128>(arrow_scalar->value); + priv->value = garrow_decimal128_new_raw(&arrow_value); + } + return priv->value; +} + + +typedef struct GArrowDecimal256ScalarPrivate_ { + GArrowDecimal256 *value; +} GArrowDecimal256ScalarPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowDecimal256Scalar, + garrow_decimal256_scalar, + GARROW_TYPE_SCALAR) + +#define GARROW_DECIMAL256_SCALAR_GET_PRIVATE(obj) \ + static_cast<GArrowDecimal256ScalarPrivate *>( \ + garrow_decimal256_scalar_get_instance_private( \ + GARROW_DECIMAL256_SCALAR(obj))) + +static void +garrow_decimal256_scalar_dispose(GObject *object) +{ + auto priv = GARROW_DECIMAL256_SCALAR_GET_PRIVATE(object); + + if (priv->value) { + g_object_unref(priv->value); + priv->value = NULL; + } + + G_OBJECT_CLASS(garrow_decimal256_scalar_parent_class)->dispose(object); +} + +static void +garrow_decimal256_scalar_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_DECIMAL256_SCALAR_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_VALUE: + priv->value = GARROW_DECIMAL256(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_decimal256_scalar_init(GArrowDecimal256Scalar *object) +{ +} + +static void +garrow_decimal256_scalar_class_init(GArrowDecimal256ScalarClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_decimal256_scalar_dispose; + gobject_class->set_property = garrow_decimal256_scalar_set_property; + + GParamSpec *spec; + /** + * GArrowDecimal256Scalar:value: + * + * The value of the scalar. + * + * Since: 5.0.0 + */ + spec = g_param_spec_object("value", + "Value", + "The value of the scalar", + garrow_decimal256_get_type(), + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_VALUE, spec); +} + +/** + * garrow_decimal256_scalar_new: + * @data_type: A #GArrowDecimal256DataType for this scalar. + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowDecimal256Scalar. + * + * Since: 5.0.0 + */ +GArrowDecimal256Scalar * +garrow_decimal256_scalar_new(GArrowDecimal256DataType *data_type, + GArrowDecimal256 *value) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto arrow_value = garrow_decimal256_get_raw(value); + auto arrow_scalar = + std::static_pointer_cast<arrow::Scalar>( + std::make_shared<arrow::Decimal256Scalar>(*arrow_value, arrow_data_type)); + return GARROW_DECIMAL256_SCALAR(garrow_scalar_new_raw(&arrow_scalar, + "scalar", &arrow_scalar, + "data-type", data_type, + "value", value, + NULL)); +} + +/** + * garrow_decimal256_scalar_get_value: + * @scalar: A #GArrowDecimal256Scalar. + * + * Returns: (transfer none): The value of this scalar. + * + * Since: 5.0.0 + */ +GArrowDecimal256 * +garrow_decimal256_scalar_get_value(GArrowDecimal256Scalar *scalar) +{ + auto priv = GARROW_DECIMAL256_SCALAR_GET_PRIVATE(scalar); + if (!priv->value) { + auto arrow_scalar = + std::static_pointer_cast<arrow::Decimal256Scalar>( + garrow_scalar_get_raw(GARROW_SCALAR(scalar))); + auto arrow_value = std::make_shared<arrow::Decimal256>(arrow_scalar->value); + priv->value = garrow_decimal256_new_raw(&arrow_value); + } + return priv->value; +} + + +typedef struct GArrowBaseListScalarPrivate_ { + GArrowArray *value; +} GArrowBaseListScalarPrivate; + +G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowBaseListScalar, + garrow_base_list_scalar, + GARROW_TYPE_SCALAR) + +#define GARROW_BASE_LIST_SCALAR_GET_PRIVATE(obj) \ + static_cast<GArrowBaseListScalarPrivate *>( \ + garrow_base_list_scalar_get_instance_private( \ + GARROW_BASE_LIST_SCALAR(obj))) + +static void +garrow_base_list_scalar_dispose(GObject *object) +{ + auto priv = GARROW_BASE_LIST_SCALAR_GET_PRIVATE(object); + + if (priv->value) { + g_object_unref(priv->value); + priv->value = NULL; + } + + G_OBJECT_CLASS(garrow_base_list_scalar_parent_class)->dispose(object); +} + +static void +garrow_base_list_scalar_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_BASE_LIST_SCALAR_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_VALUE: + priv->value = GARROW_ARRAY(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_base_list_scalar_init(GArrowBaseListScalar *object) +{ +} + +static void +garrow_base_list_scalar_class_init(GArrowBaseListScalarClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_base_list_scalar_dispose; + gobject_class->set_property = garrow_base_list_scalar_set_property; + + GParamSpec *spec; + /** + * GArrowBaseListScalar:value: + * + * The value of the scalar. + * + * Since: 5.0.0 + */ + spec = g_param_spec_object("value", + "Value", + "The value of the scalar", + GARROW_TYPE_ARRAY, + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_VALUE, spec); +} + +G_END_DECLS +template<typename ArrowListScalarType> +GArrowScalar * +garrow_base_list_scalar_new(GArrowArray *value) +{ + auto arrow_value = garrow_array_get_raw(value); + auto arrow_scalar = + std::static_pointer_cast<arrow::Scalar>( + std::make_shared<ArrowListScalarType>(arrow_value)); + auto data_type = garrow_array_get_value_data_type(value); + auto scalar = garrow_scalar_new_raw(&arrow_scalar, + "scalar", &arrow_scalar, + "data-type", data_type, + "value", value, + NULL); + g_object_unref(data_type); + return scalar; +} +G_BEGIN_DECLS + +/** + * garrow_base_list_scalar_get_value: + * @scalar: A #GArrowBaseListScalar. + * + * Returns: (transfer none): The value of this scalar. + * + * Since: 5.0.0 + */ +GArrowArray * +garrow_base_list_scalar_get_value(GArrowBaseListScalar *scalar) +{ + auto priv = GARROW_BASE_LIST_SCALAR_GET_PRIVATE(scalar); + if (!priv->value) { + const auto arrow_scalar = + std::static_pointer_cast<arrow::BaseListScalar>( + garrow_scalar_get_raw(GARROW_SCALAR(scalar))); + priv->value = garrow_array_new_raw(&(arrow_scalar->value)); + } + return priv->value; +} + + +G_DEFINE_TYPE(GArrowListScalar, + garrow_list_scalar, + GARROW_TYPE_BASE_LIST_SCALAR) + +static void +garrow_list_scalar_init(GArrowListScalar *object) +{ +} + +static void +garrow_list_scalar_class_init(GArrowListScalarClass *klass) +{ +} + +/** + * garrow_list_scalar_new: + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowListScalar. + * + * Since: 5.0.0 + */ +GArrowListScalar * +garrow_list_scalar_new(GArrowListArray *value) +{ + return GARROW_LIST_SCALAR( + garrow_base_list_scalar_new<arrow::ListScalar>(GARROW_ARRAY(value))); +} + + +G_DEFINE_TYPE(GArrowLargeListScalar, + garrow_large_list_scalar, + GARROW_TYPE_BASE_LIST_SCALAR) + +static void +garrow_large_list_scalar_init(GArrowLargeListScalar *object) +{ +} + +static void +garrow_large_list_scalar_class_init(GArrowLargeListScalarClass *klass) +{ +} + +/** + * garrow_large_list_scalar_new: + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowLargeListScalar. + * + * Since: 5.0.0 + */ +GArrowLargeListScalar * +garrow_large_list_scalar_new(GArrowLargeListArray *value) +{ + return GARROW_LARGE_LIST_SCALAR( + garrow_base_list_scalar_new<arrow::LargeListScalar>(GARROW_ARRAY(value))); +} + + +G_DEFINE_TYPE(GArrowMapScalar, + garrow_map_scalar, + GARROW_TYPE_BASE_LIST_SCALAR) + +static void +garrow_map_scalar_init(GArrowMapScalar *object) +{ +} + +static void +garrow_map_scalar_class_init(GArrowMapScalarClass *klass) +{ +} + +/** + * garrow_map_scalar_new: + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowMapScalar. + * + * Since: 5.0.0 + */ +GArrowMapScalar * +garrow_map_scalar_new(GArrowStructArray *value) +{ + return GARROW_MAP_SCALAR( + garrow_base_list_scalar_new<arrow::MapScalar>(GARROW_ARRAY(value))); +} + + +typedef struct GArrowStructScalarPrivate_ { + GList *value; +} GArrowStructScalarPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowStructScalar, + garrow_struct_scalar, + GARROW_TYPE_SCALAR) + +#define GARROW_STRUCT_SCALAR_GET_PRIVATE(obj) \ + static_cast<GArrowStructScalarPrivate *>( \ + garrow_struct_scalar_get_instance_private( \ + GARROW_STRUCT_SCALAR(obj))) + +static void +garrow_struct_scalar_dispose(GObject *object) +{ + auto priv = GARROW_STRUCT_SCALAR_GET_PRIVATE(object); + + if (priv->value) { + g_list_free_full(priv->value, g_object_unref); + priv->value = NULL; + } + + G_OBJECT_CLASS(garrow_struct_scalar_parent_class)->dispose(object); +} + +static void +garrow_struct_scalar_init(GArrowStructScalar *object) +{ +} + +static void +garrow_struct_scalar_class_init(GArrowStructScalarClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->dispose = garrow_struct_scalar_dispose; +} + +/** + * garrow_struct_scalar_new: + * @data_type: A #GArrowStructDataType for this scalar. + * @value: (element-type GArrowScalar): The value of this scalar. + * + * Returns: A newly created #GArrowDecimal256Scalar. + * + * Since: 5.0.0 + */ +GArrowStructScalar * +garrow_struct_scalar_new(GArrowStructDataType *data_type, + GList *value) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + std::vector<std::shared_ptr<arrow::Scalar>> arrow_value; + for (GList *node = value; node; node = node->next) { + auto field = GARROW_SCALAR(node->data); + auto arrow_field = garrow_scalar_get_raw(field); + arrow_value.push_back(arrow_field); + } + auto arrow_scalar = + std::static_pointer_cast<arrow::Scalar>( + std::make_shared<arrow::StructScalar>(arrow_value, arrow_data_type)); + auto scalar = + GARROW_STRUCT_SCALAR( + garrow_scalar_new_raw(&arrow_scalar, + "scalar", &arrow_scalar, + "data-type", data_type, + NULL)); + auto priv = GARROW_STRUCT_SCALAR_GET_PRIVATE(scalar); + priv->value = g_list_copy_deep(value, + reinterpret_cast<GCopyFunc>(g_object_ref), + NULL); + return scalar; +} + +/** + * garrow_struct_scalar_get_value: + * @scalar: A #GArrowStructScalar. + * + * Returns: (element-type GArrowScalar) (transfer none): + * The value of this scalar. + * + * Since: 5.0.0 + */ +GList * +garrow_struct_scalar_get_value(GArrowStructScalar *scalar) +{ + auto priv = GARROW_STRUCT_SCALAR_GET_PRIVATE(scalar); + return priv->value; +} + + +typedef struct GArrowUnionScalarPrivate_ { + GArrowScalar *value; +} GArrowUnionScalarPrivate; + +G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowUnionScalar, + garrow_union_scalar, + GARROW_TYPE_SCALAR) + +#define GARROW_UNION_SCALAR_GET_PRIVATE(obj) \ + static_cast<GArrowUnionScalarPrivate *>( \ + garrow_union_scalar_get_instance_private( \ + GARROW_UNION_SCALAR(obj))) + +static void +garrow_union_scalar_dispose(GObject *object) +{ + auto priv = GARROW_UNION_SCALAR_GET_PRIVATE(object); + + if (priv->value) { + g_object_unref(priv->value); + priv->value = NULL; + } + + G_OBJECT_CLASS(garrow_union_scalar_parent_class)->dispose(object); +} + +static void +garrow_union_scalar_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_UNION_SCALAR_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_VALUE: + priv->value = GARROW_SCALAR(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_union_scalar_init(GArrowUnionScalar *object) +{ +} + +static void +garrow_union_scalar_class_init(GArrowUnionScalarClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->dispose = garrow_union_scalar_dispose; + gobject_class->set_property = garrow_union_scalar_set_property; + + GParamSpec *spec; + /** + * GArrowUnionScalar:value: + * + * The value of the scalar. + * + * Since: 5.0.0 + */ + spec = g_param_spec_object("value", + "Value", + "The value of the scalar", + GARROW_TYPE_SCALAR, + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_VALUE, spec); +} + +G_END_DECLS +template<typename ArrowUnionScalarType> +GArrowScalar * +garrow_union_scalar_new(GArrowDataType *data_type, + gint8 type_code, + GArrowScalar *value) +{ + auto arrow_data_type = garrow_data_type_get_raw(data_type); + auto arrow_value = garrow_scalar_get_raw(value); + auto arrow_scalar = + std::static_pointer_cast<arrow::Scalar>( + std::make_shared<ArrowUnionScalarType>(arrow_value, type_code, + arrow_data_type)); + auto scalar = garrow_scalar_new_raw(&arrow_scalar, + "scalar", &arrow_scalar, + "data-type", data_type, + "value", value, + NULL); + return scalar; +} +G_BEGIN_DECLS + +/** + * garrow_union_scalar_get_type_code: + * @scalar: A #GArrowUnionScalar. + * + * Returns: The type code of this scalar. + * + * Since: 6.0.0 + */ +gint8 +garrow_union_scalar_get_type_code(GArrowUnionScalar *scalar) +{ + const auto &arrow_scalar = + std::static_pointer_cast<arrow::UnionScalar>( + garrow_scalar_get_raw(GARROW_SCALAR(scalar))); + return arrow_scalar->type_code; +} + +/** + * garrow_union_scalar_get_value: + * @scalar: A #GArrowUnionScalar. + * + * Returns: (transfer none): The value of this scalar. + * + * Since: 5.0.0 + */ +GArrowScalar * +garrow_union_scalar_get_value(GArrowUnionScalar *scalar) +{ + auto priv = GARROW_UNION_SCALAR_GET_PRIVATE(scalar); + return priv->value; +} + + +G_DEFINE_TYPE(GArrowSparseUnionScalar, + garrow_sparse_union_scalar, + GARROW_TYPE_UNION_SCALAR) + +static void +garrow_sparse_union_scalar_init(GArrowSparseUnionScalar *object) +{ +} + +static void +garrow_sparse_union_scalar_class_init(GArrowSparseUnionScalarClass *klass) +{ +} + +/** + * garrow_sparse_union_scalar_new: + * @data_type: A #GArrowSparseUnionDataType for this scalar. + * @type_code: The type code of this scalar. + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowSparseUnionScalar. + * + * Since: 5.0.0 + */ +GArrowSparseUnionScalar * +garrow_sparse_union_scalar_new(GArrowSparseUnionDataType *data_type, + gint8 type_code, + GArrowScalar *value) +{ + return GARROW_SPARSE_UNION_SCALAR( + garrow_union_scalar_new<arrow::SparseUnionScalar>( + GARROW_DATA_TYPE(data_type), type_code, value)); +} + + +G_DEFINE_TYPE(GArrowDenseUnionScalar, + garrow_dense_union_scalar, + GARROW_TYPE_UNION_SCALAR) + +static void +garrow_dense_union_scalar_init(GArrowDenseUnionScalar *object) +{ +} + +static void +garrow_dense_union_scalar_class_init(GArrowDenseUnionScalarClass *klass) +{ +} + +/** + * garrow_dense_union_scalar_new: + * @data_type: A #GArrowDenseUnionDataType for this scalar. + * @type_code: The type code of this scalar. + * @value: The value of this scalar. + * + * Returns: A newly created #GArrowDenseUnionScalar. + * + * Since: 5.0.0 + */ +GArrowDenseUnionScalar * +garrow_dense_union_scalar_new(GArrowDenseUnionDataType *data_type, + gint8 type_code, + GArrowScalar *value) +{ + return GARROW_DENSE_UNION_SCALAR( + garrow_union_scalar_new<arrow::DenseUnionScalar>( + GARROW_DATA_TYPE(data_type), type_code, value)); +} + + +G_DEFINE_TYPE(GArrowExtensionScalar, + garrow_extension_scalar, + GARROW_TYPE_SCALAR) + +static void +garrow_extension_scalar_init(GArrowExtensionScalar *object) +{ +} + +static void +garrow_extension_scalar_class_init(GArrowExtensionScalarClass *klass) +{ +} + + +G_END_DECLS + +GArrowScalar * +garrow_scalar_new_raw(std::shared_ptr<arrow::Scalar> *arrow_scalar) +{ + return garrow_scalar_new_raw(arrow_scalar, + "scalar", arrow_scalar, + NULL); +} + +GArrowScalar * +garrow_scalar_new_raw(std::shared_ptr<arrow::Scalar> *arrow_scalar, + const gchar *first_property_name, + ...) +{ + va_list args; + va_start(args, first_property_name); + auto array = garrow_scalar_new_raw_valist(arrow_scalar, + first_property_name, + args); + va_end(args); + return array; +} + +GArrowScalar * +garrow_scalar_new_raw_valist(std::shared_ptr<arrow::Scalar> *arrow_scalar, + const gchar *first_property_name, + va_list args) +{ + GType type; + GArrowScalar *scalar; + + switch ((*arrow_scalar)->type->id()) { + case arrow::Type::type::NA: + type = GARROW_TYPE_NULL_SCALAR; + break; + case arrow::Type::type::BOOL: + type = GARROW_TYPE_BOOLEAN_SCALAR; + break; + case arrow::Type::type::INT8: + type = GARROW_TYPE_INT8_SCALAR; + break; + case arrow::Type::type::INT16: + type = GARROW_TYPE_INT16_SCALAR; + break; + case arrow::Type::type::INT32: + type = GARROW_TYPE_INT32_SCALAR; + break; + case arrow::Type::type::INT64: + type = GARROW_TYPE_INT64_SCALAR; + break; + case arrow::Type::type::UINT8: + type = GARROW_TYPE_UINT8_SCALAR; + break; + case arrow::Type::type::UINT16: + type = GARROW_TYPE_UINT16_SCALAR; + break; + case arrow::Type::type::UINT32: + type = GARROW_TYPE_UINT32_SCALAR; + break; + case arrow::Type::type::UINT64: + type = GARROW_TYPE_UINT64_SCALAR; + break; + case arrow::Type::type::FLOAT: + type = GARROW_TYPE_FLOAT_SCALAR; + break; + case arrow::Type::type::DOUBLE: + type = GARROW_TYPE_DOUBLE_SCALAR; + break; + case arrow::Type::type::BINARY: + type = GARROW_TYPE_BINARY_SCALAR; + break; + case arrow::Type::type::STRING: + type = GARROW_TYPE_STRING_SCALAR; + break; + case arrow::Type::type::LARGE_BINARY: + type = GARROW_TYPE_LARGE_BINARY_SCALAR; + break; + case arrow::Type::type::LARGE_STRING: + type = GARROW_TYPE_LARGE_STRING_SCALAR; + break; + case arrow::Type::type::FIXED_SIZE_BINARY: + type = GARROW_TYPE_FIXED_SIZE_BINARY_SCALAR; + break; + case arrow::Type::type::DATE32: + type = GARROW_TYPE_DATE32_SCALAR; + break; + case arrow::Type::type::DATE64: + type = GARROW_TYPE_DATE64_SCALAR; + break; + case arrow::Type::type::TIME32: + type = GARROW_TYPE_TIME32_SCALAR; + break; + case arrow::Type::type::TIME64: + type = GARROW_TYPE_TIME64_SCALAR; + break; + case arrow::Type::type::TIMESTAMP: + type = GARROW_TYPE_TIMESTAMP_SCALAR; + break; + case arrow::Type::type::DECIMAL128: + type = GARROW_TYPE_DECIMAL128_SCALAR; + break; + case arrow::Type::type::DECIMAL256: + type = GARROW_TYPE_DECIMAL256_SCALAR; + break; + case arrow::Type::type::LIST: + type = GARROW_TYPE_LIST_SCALAR; + break; + case arrow::Type::type::LARGE_LIST: + type = GARROW_TYPE_LARGE_LIST_SCALAR; + break; +/* + case arrow::Type::type::FIXED_SIZE_LIST: + type = GARROW_TYPE_FIXED_SIZE_LIST_SCALAR; + break; +*/ + case arrow::Type::type::MAP: + type = GARROW_TYPE_MAP_SCALAR; + break; + case arrow::Type::type::STRUCT: + type = GARROW_TYPE_STRUCT_SCALAR; + break; + case arrow::Type::type::SPARSE_UNION: + type = GARROW_TYPE_SPARSE_UNION_SCALAR; + break; + case arrow::Type::type::DENSE_UNION: + type = GARROW_TYPE_DENSE_UNION_SCALAR; + break; + case arrow::Type::type::EXTENSION: + type = GARROW_TYPE_EXTENSION_SCALAR; + break; + default: + type = GARROW_TYPE_SCALAR; + break; + } + scalar = GARROW_SCALAR(g_object_new_valist(type, + first_property_name, + args)); + return scalar; +} + +std::shared_ptr<arrow::Scalar> +garrow_scalar_get_raw(GArrowScalar *scalar) +{ + auto priv = GARROW_SCALAR_GET_PRIVATE(scalar); + return priv->scalar; +} diff --git a/src/arrow/c_glib/arrow-glib/scalar.h b/src/arrow/c_glib/arrow-glib/scalar.h new file mode 100644 index 000000000..a110d1c5e --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/scalar.h @@ -0,0 +1,683 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/array.h> + +G_BEGIN_DECLS + +typedef struct _GArrowCastOptions GArrowCastOptions; + +#define GARROW_TYPE_SCALAR (garrow_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowScalar, + garrow_scalar, + GARROW, + SCALAR, + GObject) +struct _GArrowScalarClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowScalar * +garrow_scalar_parse(GArrowDataType *data_type, + const guint8 *data, + gsize size, + GError **error); + +GARROW_AVAILABLE_IN_5_0 +GArrowDataType * +garrow_scalar_get_data_type(GArrowScalar *scalar); +GARROW_AVAILABLE_IN_5_0 +gboolean +garrow_scalar_is_valid(GArrowScalar *scalar); +GARROW_AVAILABLE_IN_5_0 +gboolean +garrow_scalar_equal(GArrowScalar *scalar, + GArrowScalar *other_scalar); +GARROW_AVAILABLE_IN_5_0 +gboolean +garrow_scalar_equal_options(GArrowScalar *scalar, + GArrowScalar *other_scalar, + GArrowEqualOptions *options); +GARROW_AVAILABLE_IN_5_0 +gchar * +garrow_scalar_to_string(GArrowScalar *scalar); + +GARROW_AVAILABLE_IN_5_0 +GArrowScalar * +garrow_scalar_cast(GArrowScalar *scalar, + GArrowDataType *data_type, + GArrowCastOptions *options, + GError **error); + + +#define GARROW_TYPE_NULL_SCALAR (garrow_null_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowNullScalar, + garrow_null_scalar, + GARROW, + NULL_SCALAR, + GArrowScalar) +struct _GArrowNullScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowNullScalar * +garrow_null_scalar_new(void); + + +#define GARROW_TYPE_BOOLEAN_SCALAR (garrow_boolean_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowBooleanScalar, + garrow_boolean_scalar, + GARROW, + BOOLEAN_SCALAR, + GArrowScalar) +struct _GArrowBooleanScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowBooleanScalar * +garrow_boolean_scalar_new(gboolean value); +GARROW_AVAILABLE_IN_5_0 +gboolean +garrow_boolean_scalar_get_value(GArrowBooleanScalar *scalar); + + +#define GARROW_TYPE_INT8_SCALAR (garrow_int8_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowInt8Scalar, + garrow_int8_scalar, + GARROW, + INT8_SCALAR, + GArrowScalar) +struct _GArrowInt8ScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowInt8Scalar * +garrow_int8_scalar_new(gint8 value); +GARROW_AVAILABLE_IN_5_0 +gint8 +garrow_int8_scalar_get_value(GArrowInt8Scalar *scalar); + + +#define GARROW_TYPE_INT16_SCALAR (garrow_int16_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowInt16Scalar, + garrow_int16_scalar, + GARROW, + INT16_SCALAR, + GArrowScalar) +struct _GArrowInt16ScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowInt16Scalar * +garrow_int16_scalar_new(gint16 value); +GARROW_AVAILABLE_IN_5_0 +gint16 +garrow_int16_scalar_get_value(GArrowInt16Scalar *scalar); + + +#define GARROW_TYPE_INT32_SCALAR (garrow_int32_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowInt32Scalar, + garrow_int32_scalar, + GARROW, + INT32_SCALAR, + GArrowScalar) +struct _GArrowInt32ScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowInt32Scalar * +garrow_int32_scalar_new(gint32 value); +GARROW_AVAILABLE_IN_5_0 +gint32 +garrow_int32_scalar_get_value(GArrowInt32Scalar *scalar); + + +#define GARROW_TYPE_INT64_SCALAR (garrow_int64_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowInt64Scalar, + garrow_int64_scalar, + GARROW, + INT64_SCALAR, + GArrowScalar) +struct _GArrowInt64ScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowInt64Scalar * +garrow_int64_scalar_new(gint64 value); +GARROW_AVAILABLE_IN_5_0 +gint64 +garrow_int64_scalar_get_value(GArrowInt64Scalar *scalar); + + +#define GARROW_TYPE_UINT8_SCALAR (garrow_uint8_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowUInt8Scalar, + garrow_uint8_scalar, + GARROW, + UINT8_SCALAR, + GArrowScalar) +struct _GArrowUInt8ScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowUInt8Scalar * +garrow_uint8_scalar_new(guint8 value); +GARROW_AVAILABLE_IN_5_0 +guint8 +garrow_uint8_scalar_get_value(GArrowUInt8Scalar *scalar); + + +#define GARROW_TYPE_UINT16_SCALAR (garrow_uint16_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowUInt16Scalar, + garrow_uint16_scalar, + GARROW, + UINT16_SCALAR, + GArrowScalar) +struct _GArrowUInt16ScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowUInt16Scalar * +garrow_uint16_scalar_new(guint16 value); +GARROW_AVAILABLE_IN_5_0 +guint16 +garrow_uint16_scalar_get_value(GArrowUInt16Scalar *scalar); + + +#define GARROW_TYPE_UINT32_SCALAR (garrow_uint32_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowUInt32Scalar, + garrow_uint32_scalar, + GARROW, + UINT32_SCALAR, + GArrowScalar) +struct _GArrowUInt32ScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowUInt32Scalar * +garrow_uint32_scalar_new(guint32 value); +GARROW_AVAILABLE_IN_5_0 +guint32 +garrow_uint32_scalar_get_value(GArrowUInt32Scalar *scalar); + + +#define GARROW_TYPE_UINT64_SCALAR (garrow_uint64_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowUInt64Scalar, + garrow_uint64_scalar, + GARROW, + UINT64_SCALAR, + GArrowScalar) +struct _GArrowUInt64ScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowUInt64Scalar * +garrow_uint64_scalar_new(guint64 value); +GARROW_AVAILABLE_IN_5_0 +guint64 +garrow_uint64_scalar_get_value(GArrowUInt64Scalar *scalar); + + +#define GARROW_TYPE_FLOAT_SCALAR (garrow_float_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowFloatScalar, + garrow_float_scalar, + GARROW, + FLOAT_SCALAR, + GArrowScalar) +struct _GArrowFloatScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowFloatScalar * +garrow_float_scalar_new(gfloat value); +GARROW_AVAILABLE_IN_5_0 +gfloat +garrow_float_scalar_get_value(GArrowFloatScalar *scalar); + + +#define GARROW_TYPE_DOUBLE_SCALAR (garrow_double_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDoubleScalar, + garrow_double_scalar, + GARROW, + DOUBLE_SCALAR, + GArrowScalar) +struct _GArrowDoubleScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowDoubleScalar * +garrow_double_scalar_new(gdouble value); +GARROW_AVAILABLE_IN_5_0 +gdouble +garrow_double_scalar_get_value(GArrowDoubleScalar *scalar); + + +#define GARROW_TYPE_BASE_BINARY_SCALAR (garrow_base_binary_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowBaseBinaryScalar, + garrow_base_binary_scalar, + GARROW, + BASE_BINARY_SCALAR, + GArrowScalar) +struct _GArrowBaseBinaryScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowBuffer * +garrow_base_binary_scalar_get_value(GArrowBaseBinaryScalar *scalar); + + +#define GARROW_TYPE_BINARY_SCALAR (garrow_binary_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowBinaryScalar, + garrow_binary_scalar, + GARROW, + BINARY_SCALAR, + GArrowBaseBinaryScalar) +struct _GArrowBinaryScalarClass +{ + GArrowBaseBinaryScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowBinaryScalar * +garrow_binary_scalar_new(GArrowBuffer *value); + + +#define GARROW_TYPE_STRING_SCALAR (garrow_string_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowStringScalar, + garrow_string_scalar, + GARROW, + STRING_SCALAR, + GArrowBaseBinaryScalar) +struct _GArrowStringScalarClass +{ + GArrowBaseBinaryScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowStringScalar * +garrow_string_scalar_new(GArrowBuffer *value); + + +#define GARROW_TYPE_LARGE_BINARY_SCALAR (garrow_large_binary_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowLargeBinaryScalar, + garrow_large_binary_scalar, + GARROW, + LARGE_BINARY_SCALAR, + GArrowBaseBinaryScalar) +struct _GArrowLargeBinaryScalarClass +{ + GArrowBaseBinaryScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowLargeBinaryScalar * +garrow_large_binary_scalar_new(GArrowBuffer *value); + + +#define GARROW_TYPE_LARGE_STRING_SCALAR (garrow_large_string_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowLargeStringScalar, + garrow_large_string_scalar, + GARROW, + LARGE_STRING_SCALAR, + GArrowBaseBinaryScalar) +struct _GArrowLargeStringScalarClass +{ + GArrowBaseBinaryScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowLargeStringScalar * +garrow_large_string_scalar_new(GArrowBuffer *value); + + +#define GARROW_TYPE_FIXED_SIZE_BINARY_SCALAR \ + (garrow_fixed_size_binary_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeBinaryScalar, + garrow_fixed_size_binary_scalar, + GARROW, + FIXED_SIZE_BINARY_SCALAR, + GArrowBaseBinaryScalar) +struct _GArrowFixedSizeBinaryScalarClass +{ + GArrowBaseBinaryScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowFixedSizeBinaryScalar * +garrow_fixed_size_binary_scalar_new(GArrowFixedSizeBinaryDataType *data_type, + GArrowBuffer *value); + + +#define GARROW_TYPE_DATE32_SCALAR (garrow_date32_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDate32Scalar, + garrow_date32_scalar, + GARROW, + DATE32_SCALAR, + GArrowScalar) +struct _GArrowDate32ScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowDate32Scalar * +garrow_date32_scalar_new(gint32 value); +GARROW_AVAILABLE_IN_5_0 +gint32 +garrow_date32_scalar_get_value(GArrowDate32Scalar *scalar); + + +#define GARROW_TYPE_DATE64_SCALAR (garrow_date64_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDate64Scalar, + garrow_date64_scalar, + GARROW, + DATE64_SCALAR, + GArrowScalar) +struct _GArrowDate64ScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowDate64Scalar * +garrow_date64_scalar_new(gint64 value); +GARROW_AVAILABLE_IN_5_0 +gint64 +garrow_date64_scalar_get_value(GArrowDate64Scalar *scalar); + + +#define GARROW_TYPE_TIME32_SCALAR (garrow_time32_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowTime32Scalar, + garrow_time32_scalar, + GARROW, + TIME32_SCALAR, + GArrowScalar) +struct _GArrowTime32ScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowTime32Scalar * +garrow_time32_scalar_new(GArrowTime32DataType *data_type, + gint32 value); +GARROW_AVAILABLE_IN_5_0 +gint32 +garrow_time32_scalar_get_value(GArrowTime32Scalar *scalar); + + +#define GARROW_TYPE_TIME64_SCALAR (garrow_time64_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowTime64Scalar, + garrow_time64_scalar, + GARROW, + TIME64_SCALAR, + GArrowScalar) +struct _GArrowTime64ScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowTime64Scalar * +garrow_time64_scalar_new(GArrowTime64DataType *data_type, + gint64 value); +GARROW_AVAILABLE_IN_5_0 +gint64 +garrow_time64_scalar_get_value(GArrowTime64Scalar *scalar); + + +#define GARROW_TYPE_TIMESTAMP_SCALAR (garrow_timestamp_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowTimestampScalar, + garrow_timestamp_scalar, + GARROW, + TIMESTAMP_SCALAR, + GArrowScalar) +struct _GArrowTimestampScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowTimestampScalar * +garrow_timestamp_scalar_new(GArrowTimestampDataType *data_type, + gint64 value); +GARROW_AVAILABLE_IN_5_0 +gint64 +garrow_timestamp_scalar_get_value(GArrowTimestampScalar *scalar); + + +#define GARROW_TYPE_DECIMAL128_SCALAR (garrow_decimal128_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128Scalar, + garrow_decimal128_scalar, + GARROW, + DECIMAL128_SCALAR, + GArrowScalar) +struct _GArrowDecimal128ScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowDecimal128Scalar * +garrow_decimal128_scalar_new(GArrowDecimal128DataType *data_type, + GArrowDecimal128 *value); +GARROW_AVAILABLE_IN_5_0 +GArrowDecimal128 * +garrow_decimal128_scalar_get_value(GArrowDecimal128Scalar *scalar); + + +#define GARROW_TYPE_DECIMAL256_SCALAR (garrow_decimal256_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256Scalar, + garrow_decimal256_scalar, + GARROW, + DECIMAL256_SCALAR, + GArrowScalar) +struct _GArrowDecimal256ScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowDecimal256Scalar * +garrow_decimal256_scalar_new(GArrowDecimal256DataType *data_type, + GArrowDecimal256 *value); +GARROW_AVAILABLE_IN_5_0 +GArrowDecimal256 * +garrow_decimal256_scalar_get_value(GArrowDecimal256Scalar *scalar); + + +#define GARROW_TYPE_BASE_LIST_SCALAR (garrow_base_list_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowBaseListScalar, + garrow_base_list_scalar, + GARROW, + BASE_LIST_SCALAR, + GArrowScalar) +struct _GArrowBaseListScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowArray * +garrow_base_list_scalar_get_value(GArrowBaseListScalar *scalar); + +#define GARROW_TYPE_LIST_SCALAR (garrow_list_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowListScalar, + garrow_list_scalar, + GARROW, + LIST_SCALAR, + GArrowBaseListScalar) +struct _GArrowListScalarClass +{ + GArrowBaseListScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowListScalar * +garrow_list_scalar_new(GArrowListArray *value); + + +#define GARROW_TYPE_LARGE_LIST_SCALAR (garrow_large_list_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowLargeListScalar, + garrow_large_list_scalar, + GARROW, + LARGE_LIST_SCALAR, + GArrowBaseListScalar) +struct _GArrowLargeListScalarClass +{ + GArrowBaseListScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowLargeListScalar * +garrow_large_list_scalar_new(GArrowLargeListArray *value); + + +#define GARROW_TYPE_MAP_SCALAR (garrow_map_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowMapScalar, + garrow_map_scalar, + GARROW, + MAP_SCALAR, + GArrowBaseListScalar) +struct _GArrowMapScalarClass +{ + GArrowBaseListScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowMapScalar * +garrow_map_scalar_new(GArrowStructArray *value); + + +#define GARROW_TYPE_STRUCT_SCALAR (garrow_struct_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowStructScalar, + garrow_struct_scalar, + GARROW, + STRUCT_SCALAR, + GArrowScalar) +struct _GArrowStructScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowStructScalar * +garrow_struct_scalar_new(GArrowStructDataType *data_type, + GList *value); +GARROW_AVAILABLE_IN_5_0 +GList * +garrow_struct_scalar_get_value(GArrowStructScalar *scalar); + + +#define GARROW_TYPE_UNION_SCALAR (garrow_union_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowUnionScalar, + garrow_union_scalar, + GARROW, + UNION_SCALAR, + GArrowScalar) +struct _GArrowUnionScalarClass +{ + GArrowScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +gint8 +garrow_union_scalar_get_type_code(GArrowUnionScalar *scalar); +GARROW_AVAILABLE_IN_5_0 +GArrowScalar * +garrow_union_scalar_get_value(GArrowUnionScalar *scalar); + + +#define GARROW_TYPE_SPARSE_UNION_SCALAR (garrow_sparse_union_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionScalar, + garrow_sparse_union_scalar, + GARROW, + SPARSE_UNION_SCALAR, + GArrowUnionScalar) +struct _GArrowSparseUnionScalarClass +{ + GArrowUnionScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowSparseUnionScalar * +garrow_sparse_union_scalar_new(GArrowSparseUnionDataType *data_type, + gint8 type_code, + GArrowScalar *value); + + +#define GARROW_TYPE_DENSE_UNION_SCALAR (garrow_dense_union_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionScalar, + garrow_dense_union_scalar, + GARROW, + DENSE_UNION_SCALAR, + GArrowUnionScalar) +struct _GArrowDenseUnionScalarClass +{ + GArrowUnionScalarClass parent_class; +}; + +GARROW_AVAILABLE_IN_5_0 +GArrowDenseUnionScalar * +garrow_dense_union_scalar_new(GArrowDenseUnionDataType *data_type, + gint8 type_code, + GArrowScalar *value); + + +#define GARROW_TYPE_EXTENSION_SCALAR (garrow_extension_scalar_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowExtensionScalar, + garrow_extension_scalar, + GARROW, + EXTENSION_SCALAR, + GArrowScalar) +struct _GArrowExtensionScalarClass +{ + GArrowScalarClass parent_class; +}; + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/scalar.hpp b/src/arrow/c_glib/arrow-glib/scalar.hpp new file mode 100644 index 000000000..46ac73e21 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/scalar.hpp @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/api.h> + +#include <arrow-glib/scalar.h> + +GArrowScalar * +garrow_scalar_new_raw(std::shared_ptr<arrow::Scalar> *arrow_scalar); +GArrowScalar * +garrow_scalar_new_raw(std::shared_ptr<arrow::Scalar> *arrow_scalar, + const gchar *first_property_name, + ...); +GArrowScalar * +garrow_scalar_new_raw_valist(std::shared_ptr<arrow::Scalar> *arrow_scalar, + const gchar *first_property_name, + va_list args); +std::shared_ptr<arrow::Scalar> +garrow_scalar_get_raw(GArrowScalar *scalar); diff --git a/src/arrow/c_glib/arrow-glib/schema.cpp b/src/arrow/c_glib/arrow-glib/schema.cpp new file mode 100644 index 000000000..3491bb0dd --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/schema.cpp @@ -0,0 +1,494 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/basic-data-type.hpp> +#include <arrow-glib/error.hpp> +#include <arrow-glib/field.hpp> +#include <arrow-glib/internal-hash-table.hpp> +#include <arrow-glib/schema.hpp> + +#include <arrow/c/bridge.h> + +G_BEGIN_DECLS + +/** + * SECTION: schema + * @short_description: Schema class + * + * #GArrowSchema is a class for schema. Schema is metadata of a + * table. It has zero or more #GArrowFields. + */ + +typedef struct GArrowSchemaPrivate_ { + std::shared_ptr<arrow::Schema> schema; +} GArrowSchemaPrivate; + +enum { + PROP_0, + PROP_SCHEMA +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowSchema, + garrow_schema, + G_TYPE_OBJECT) + +#define GARROW_SCHEMA_GET_PRIVATE(obj) \ + static_cast<GArrowSchemaPrivate *>( \ + garrow_schema_get_instance_private( \ + GARROW_SCHEMA(obj))) + +static void +garrow_schema_finalize(GObject *object) +{ + auto priv = GARROW_SCHEMA_GET_PRIVATE(object); + + priv->schema.~shared_ptr(); + + G_OBJECT_CLASS(garrow_schema_parent_class)->finalize(object); +} + +static void +garrow_schema_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_SCHEMA_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_SCHEMA: + priv->schema = + *static_cast<std::shared_ptr<arrow::Schema> *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_schema_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + switch (prop_id) { + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_schema_init(GArrowSchema *object) +{ + auto priv = GARROW_SCHEMA_GET_PRIVATE(object); + new(&priv->schema) std::shared_ptr<arrow::Schema>; +} + +static void +garrow_schema_class_init(GArrowSchemaClass *klass) +{ + GObjectClass *gobject_class; + GParamSpec *spec; + + gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_schema_finalize; + gobject_class->set_property = garrow_schema_set_property; + gobject_class->get_property = garrow_schema_get_property; + + spec = g_param_spec_pointer("schema", + "Schema", + "The raw std::shared<arrow::Schema> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_SCHEMA, spec); +} + +/** + * garrow_schema_import: + * @c_abi_schema: (not nullable): A `struct ArrowSchema *`. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): An imported #GArrowSchema on success, + * %NULL on error. + * + * You don't need to release the passed `struct ArrowSchema *`, + * even if this function reports an error. + * + * Since: 6.0.0 + */ +GArrowSchema * +garrow_schema_import(gpointer c_abi_schema, GError **error) +{ + auto arrow_schema_result = + arrow::ImportSchema(static_cast<ArrowSchema *>(c_abi_schema)); + if (garrow::check(error, arrow_schema_result, "[schema][import]")) { + return garrow_schema_new_raw(&(*arrow_schema_result)); + } else { + return NULL; + } +} + +/** + * garrow_schema_new: + * @fields: (element-type GArrowField): The fields of the schema. + * + * Returns: A newly created #GArrowSchema. + */ +GArrowSchema * +garrow_schema_new(GList *fields) +{ + std::vector<std::shared_ptr<arrow::Field>> arrow_fields; + for (GList *node = fields; node; node = node->next) { + GArrowField *field = GARROW_FIELD(node->data); + arrow_fields.push_back(garrow_field_get_raw(field)); + } + + auto arrow_schema = std::make_shared<arrow::Schema>(arrow_fields); + return garrow_schema_new_raw(&arrow_schema); +} + +/** + * garrow_schema_export: + * @schema: A #GArrowSchema. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): An exported #GArrowSchema as + * `struct ArrowStruct *` on success, %NULL on error. + * + * It should be freed with the `ArrowSchema::release` callback then + * g_free() when no longer needed. + * + * Since: 6.0.0 + */ +gpointer +garrow_schema_export(GArrowSchema *schema, GError **error) +{ + const auto arrow_schema = garrow_schema_get_raw(schema); + auto c_abi_schema = g_new(ArrowSchema, 1); + auto status = arrow::ExportSchema(*arrow_schema, c_abi_schema); + if (garrow::check(error, status, "[schema][export]")) { + return c_abi_schema; + } else { + g_free(c_abi_schema); + return NULL; + } +} + +/** + * garrow_schema_equal: + * @schema: A #GArrowSchema. + * @other_schema: A #GArrowSchema to be compared. + * + * Returns: %TRUE if both of them have the same data, %FALSE + * otherwise. + * + * Since: 0.4.0 + */ +gboolean +garrow_schema_equal(GArrowSchema *schema, GArrowSchema *other_schema) +{ + const auto arrow_schema = garrow_schema_get_raw(schema); + const auto arrow_other_schema = garrow_schema_get_raw(other_schema); + return arrow_schema->Equals(*arrow_other_schema); +} + +/** + * garrow_schema_get_field: + * @schema: A #GArrowSchema. + * @i: The index of the target field. + * + * Returns: (transfer full): The i-th field of the schema. + */ +GArrowField * +garrow_schema_get_field(GArrowSchema *schema, guint i) +{ + const auto arrow_schema = garrow_schema_get_raw(schema); + auto arrow_field = arrow_schema->field(i); + return garrow_field_new_raw(&arrow_field, nullptr); +} + +/** + * garrow_schema_get_field_by_name: + * @schema: A #GArrowSchema. + * @name: The name of the field to be found. + * + * Returns: (transfer full): The found field or %NULL. + */ +GArrowField * +garrow_schema_get_field_by_name(GArrowSchema *schema, + const gchar *name) +{ + const auto arrow_schema = garrow_schema_get_raw(schema); + auto arrow_field = arrow_schema->GetFieldByName(std::string(name)); + if (arrow_field == nullptr) { + return NULL; + } else { + auto arrow_data_type = arrow_field->type(); + return garrow_field_new_raw(&arrow_field, nullptr); + } +} + +/** + * garrow_schema_get_field_index: + * @schema: A #GArrowSchema. + * @name: The name of the field to be found. + * + * Returns: The index of the found field, -1 on not found. + * + * Since: 0.15.0 + */ +gint +garrow_schema_get_field_index(GArrowSchema *schema, + const gchar *name) +{ + const auto &arrow_schema = garrow_schema_get_raw(schema); + return arrow_schema->GetFieldIndex(std::string(name)); +} + +/** + * garrow_schema_n_fields: + * @schema: A #GArrowSchema. + * + * Returns: The number of fields of the schema. + */ +guint +garrow_schema_n_fields(GArrowSchema *schema) +{ + const auto arrow_schema = garrow_schema_get_raw(schema); + return arrow_schema->num_fields(); +} + +/** + * garrow_schema_get_fields: + * @schema: A #GArrowSchema. + * + * Returns: (element-type GArrowField) (transfer full): + * The fields of the schema. + */ +GList * +garrow_schema_get_fields(GArrowSchema *schema) +{ + const auto arrow_schema = garrow_schema_get_raw(schema); + + GList *fields = NULL; + for (auto arrow_field : arrow_schema->fields()) { + auto field = garrow_field_new_raw(&arrow_field, nullptr); + fields = g_list_prepend(fields, field); + } + + return g_list_reverse(fields); +} + +/** + * garrow_schema_to_string: + * @schema: A #GArrowSchema. + * + * Returns: The string representation of the schema. + */ +gchar * +garrow_schema_to_string(GArrowSchema *schema) +{ + const auto arrow_schema = garrow_schema_get_raw(schema); + return g_strdup(arrow_schema->ToString().c_str()); +} + +/** + * garrow_schema_to_string_metadata: + * @schema: A #GArrowSchema. + * @show_metadata: Whether include metadata or not. + * + * Returns: The string representation of the schema. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 0.17.0 + */ +gchar * +garrow_schema_to_string_metadata(GArrowSchema *schema, gboolean show_metadata) +{ + const auto arrow_schema = garrow_schema_get_raw(schema); + return g_strdup(arrow_schema->ToString(show_metadata).c_str()); +} + +/** + * garrow_schema_add_field: + * @schema: A #GArrowSchema. + * @i: The index of the new field. + * @field: The field to be added. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The newly allocated + * #GArrowSchema that has a new field or %NULL on error. + * + * Since: 0.10.0 + */ +GArrowSchema * +garrow_schema_add_field(GArrowSchema *schema, + guint i, + GArrowField *field, + GError **error) +{ + const auto arrow_schema = garrow_schema_get_raw(schema); + const auto arrow_field = garrow_field_get_raw(field); + auto maybe_new_schema = arrow_schema->AddField(i, arrow_field); + if (garrow::check(error, maybe_new_schema, "[schema][add-field]")) { + return garrow_schema_new_raw(&(*maybe_new_schema)); + } else { + return NULL; + } +} + +/** + * garrow_schema_remove_field: + * @schema: A #GArrowSchema. + * @i: The index of the field to be removed. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The newly allocated + * #GArrowSchema that doesn't have the field or %NULL on error. + * + * Since: 0.10.0 + */ +GArrowSchema * +garrow_schema_remove_field(GArrowSchema *schema, + guint i, + GError **error) +{ + const auto arrow_schema = garrow_schema_get_raw(schema); + auto maybe_new_schema = arrow_schema->RemoveField(i); + if (garrow::check(error, maybe_new_schema, "[schema][remove-field]")) { + return garrow_schema_new_raw(&(*maybe_new_schema)); + } else { + return NULL; + } +} + +/** + * garrow_schema_replace_field: + * @schema: A #GArrowSchema. + * @i: The index of the field to be replaced. + * @field: The newly added #GArrowField. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The newly allocated + * #GArrowSchema that has @field as the @i-th field or %NULL on error. + * + * Since: 0.10.0 + */ +GArrowSchema * +garrow_schema_replace_field(GArrowSchema *schema, + guint i, + GArrowField *field, + GError **error) +{ + const auto arrow_schema = garrow_schema_get_raw(schema); + const auto arrow_field = garrow_field_get_raw(field); + auto maybe_new_schema = arrow_schema->SetField(i, arrow_field); + if (garrow::check(error, maybe_new_schema, "[schema][replace-field]")) { + return garrow_schema_new_raw(&(*maybe_new_schema)); + } else { + return NULL; + } +} + +/** + * garrow_schema_has_metadata: + * @schema: A #GArrowSchema. + * + * Returns: %TRUE if the schema has metadata, %FALSE otherwise. + * + * Since: 3.0.0 + */ +gboolean +garrow_schema_has_metadata(GArrowSchema *schema) +{ + const auto arrow_schema = garrow_schema_get_raw(schema); + return arrow_schema->HasMetadata(); +} + +/** + * garrow_schema_get_metadata: + * @schema: A #GArrowSchema. + * + * Returns: (element-type utf8 utf8) (nullable) (transfer full): The + * metadata in the schema. + * + * It should be freed with g_hash_table_unref() when no longer needed. + * + * Since: 0.17.0 + */ +GHashTable * +garrow_schema_get_metadata(GArrowSchema *schema) +{ + const auto arrow_schema = garrow_schema_get_raw(schema); + if (!arrow_schema->HasMetadata()) { + return NULL; + } + + auto arrow_metadata = arrow_schema->metadata(); + auto metadata = g_hash_table_new(g_str_hash, g_str_equal); + const auto n = arrow_metadata->size(); + for (int64_t i = 0; i < n; ++i) { + g_hash_table_insert(metadata, + const_cast<gchar *>(arrow_metadata->key(i).c_str()), + const_cast<gchar *>(arrow_metadata->value(i).c_str())); + } + return metadata; +} + +/** + * garrow_schema_with_metadata: + * @schema: A #GArrowSchema. + * @metadata: (element-type utf8 utf8): A new associated metadata. + * + * Returns: (transfer full): The new schema with the given metadata. + * + * Since: 0.17.0 + */ +GArrowSchema * +garrow_schema_with_metadata(GArrowSchema *schema, + GHashTable *metadata) +{ + const auto arrow_schema = garrow_schema_get_raw(schema); + auto arrow_metadata = garrow_internal_hash_table_to_metadata(metadata); + auto arrow_new_schema = arrow_schema->WithMetadata(arrow_metadata); + return garrow_schema_new_raw(&arrow_new_schema); +} + + +G_END_DECLS + +GArrowSchema * +garrow_schema_new_raw(std::shared_ptr<arrow::Schema> *arrow_schema) +{ + auto schema = GARROW_SCHEMA(g_object_new(GARROW_TYPE_SCHEMA, + "schema", arrow_schema, + NULL)); + return schema; +} + +std::shared_ptr<arrow::Schema> +garrow_schema_get_raw(GArrowSchema *schema) +{ + auto priv = GARROW_SCHEMA_GET_PRIVATE(schema); + return priv->schema; +} diff --git a/src/arrow/c_glib/arrow-glib/schema.h b/src/arrow/c_glib/arrow-glib/schema.h new file mode 100644 index 000000000..25519000b --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/schema.h @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/field.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_SCHEMA (garrow_schema_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowSchema, + garrow_schema, + GARROW, + SCHEMA, + GObject) +struct _GArrowSchemaClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GArrowSchema * +garrow_schema_import(gpointer c_abi_schema, + GError **error); + +GArrowSchema *garrow_schema_new (GList *fields); + +GARROW_AVAILABLE_IN_6_0 +gpointer +garrow_schema_export(GArrowSchema *schema, + GError **error); + +gboolean garrow_schema_equal (GArrowSchema *schema, + GArrowSchema *other_schema); +GArrowField *garrow_schema_get_field (GArrowSchema *schema, + guint i); +GArrowField *garrow_schema_get_field_by_name(GArrowSchema *schema, + const gchar *name); +GARROW_AVAILABLE_IN_0_15 +gint garrow_schema_get_field_index (GArrowSchema *schema, + const gchar *name); + +guint garrow_schema_n_fields (GArrowSchema *schema); +GList *garrow_schema_get_fields (GArrowSchema *schema); + +gchar *garrow_schema_to_string(GArrowSchema *schema); +GARROW_AVAILABLE_IN_0_17 +gchar *garrow_schema_to_string_metadata(GArrowSchema *schema, + gboolean show_metadata); + +GArrowSchema *garrow_schema_add_field (GArrowSchema *schema, + guint i, + GArrowField *field, + GError **error); +GArrowSchema *garrow_schema_remove_field (GArrowSchema *schema, + guint i, + GError **error); +GArrowSchema *garrow_schema_replace_field (GArrowSchema *schema, + guint i, + GArrowField *field, + GError **error); + +GARROW_AVAILABLE_IN_3_0 +gboolean +garrow_schema_has_metadata(GArrowSchema *schema); +GARROW_AVAILABLE_IN_0_17 +GHashTable * +garrow_schema_get_metadata(GArrowSchema *schema); +GARROW_AVAILABLE_IN_0_17 +GArrowSchema * +garrow_schema_with_metadata(GArrowSchema *schema, + GHashTable *metadata); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/schema.hpp b/src/arrow/c_glib/arrow-glib/schema.hpp new file mode 100644 index 000000000..0d0253408 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/schema.hpp @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/api.h> + +#include <arrow-glib/schema.h> + +GArrowSchema *garrow_schema_new_raw(std::shared_ptr<arrow::Schema> *arrow_schema); +std::shared_ptr<arrow::Schema> garrow_schema_get_raw(GArrowSchema *schema); diff --git a/src/arrow/c_glib/arrow-glib/table-builder.cpp b/src/arrow/c_glib/arrow-glib/table-builder.cpp new file mode 100644 index 000000000..27839565c --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/table-builder.cpp @@ -0,0 +1,337 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/array-builder.hpp> +#include <arrow-glib/error.hpp> +#include <arrow-glib/record-batch.hpp> +#include <arrow-glib/schema.hpp> +#include <arrow-glib/table-builder.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: table-builder + * @section_id: table-builder-classes + * @title: Table builder classes + * @include: arrow-glib/arrow-glib.h + * + * #GArrowRecordBatchBuilder is a class to create + * new #GArrowRecordBatch. + */ + +typedef struct GArrowRecordBatchBuilderPrivate_ { + arrow::RecordBatchBuilder *record_batch_builder; + GPtrArray *column_builders; +} GArrowRecordBatchBuilderPrivate; + +enum { + PROP_0, + PROP_RECORD_BATCH_BUILDER +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowRecordBatchBuilder, + garrow_record_batch_builder, + G_TYPE_OBJECT) + +#define GARROW_RECORD_BATCH_BUILDER_GET_PRIVATE(object) \ + static_cast<GArrowRecordBatchBuilderPrivate *>( \ + garrow_record_batch_builder_get_instance_private( \ + GARROW_RECORD_BATCH_BUILDER(object))) + +static void +garrow_record_batch_builder_constructed(GObject *object) +{ + auto priv = GARROW_RECORD_BATCH_BUILDER_GET_PRIVATE(object); + auto arrow_builder = priv->record_batch_builder; + auto n_columns = arrow_builder->num_fields(); + priv->column_builders = g_ptr_array_new_full(n_columns, g_object_unref); + for (int i = 0; i < n_columns; ++i) { + auto arrow_array_builder = arrow_builder->GetField(i); + auto array_builder = garrow_array_builder_new_raw(arrow_array_builder); + garrow_array_builder_release_ownership(array_builder); + g_ptr_array_add(priv->column_builders, array_builder); + } + + G_OBJECT_CLASS(garrow_record_batch_builder_parent_class)->constructed(object); +} + +static void +garrow_record_batch_builder_finalize(GObject *object) +{ + auto priv = GARROW_RECORD_BATCH_BUILDER_GET_PRIVATE(object); + + g_ptr_array_free(priv->column_builders, TRUE); + delete priv->record_batch_builder; + + G_OBJECT_CLASS(garrow_record_batch_builder_parent_class)->finalize(object); +} + +static void +garrow_record_batch_builder_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_RECORD_BATCH_BUILDER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_RECORD_BATCH_BUILDER: + priv->record_batch_builder = + static_cast<arrow::RecordBatchBuilder *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_record_batch_builder_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + switch (prop_id) { + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_record_batch_builder_init(GArrowRecordBatchBuilder *builder) +{ +} + +static void +garrow_record_batch_builder_class_init(GArrowRecordBatchBuilderClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->constructed = garrow_record_batch_builder_constructed; + gobject_class->finalize = garrow_record_batch_builder_finalize; + gobject_class->set_property = garrow_record_batch_builder_set_property; + gobject_class->get_property = garrow_record_batch_builder_get_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("record-batch-builder", + "RecordBatch builder", + "The raw arrow::RecordBatchBuilder *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, + PROP_RECORD_BATCH_BUILDER, + spec); +} + +/** + * garrow_record_batch_builder_new: + * @schema: A #GArrowSchema. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: A newly created #GArrowRecordBatchBuilder on success, + * %NULL on error. + * + * Since: 0.8.0 + */ +GArrowRecordBatchBuilder * +garrow_record_batch_builder_new(GArrowSchema *schema, GError **error) +{ + auto arrow_schema = garrow_schema_get_raw(schema); + auto memory_pool = arrow::default_memory_pool(); + std::unique_ptr<arrow::RecordBatchBuilder> arrow_builder; + auto status = arrow::RecordBatchBuilder::Make(arrow_schema, + memory_pool, + &arrow_builder); + if (garrow_error_check(error, status, "[record-batch-builder][new]")) { + return garrow_record_batch_builder_new_raw(arrow_builder.release()); + } else { + return NULL; + } +} + +/** + * garrow_record_batch_builder_get_initial_capacity: + * @builder: A #GArrowRecordBatchBuilder. + * + * Returns: The initial capacity for array builders. + * + * Since: 0.8.0 + */ +gint64 +garrow_record_batch_builder_get_initial_capacity(GArrowRecordBatchBuilder *builder) +{ + auto arrow_builder = garrow_record_batch_builder_get_raw(builder); + return arrow_builder->initial_capacity(); +} + +/** + * garrow_record_batch_builder_set_initial_capacity: + * @builder: A #GArrowRecordBatchBuilder. + * @capacity: The new initial capacity for array builders. + * + * Since: 0.8.0 + */ +void +garrow_record_batch_builder_set_initial_capacity(GArrowRecordBatchBuilder *builder, + gint64 capacity) +{ + auto arrow_builder = garrow_record_batch_builder_get_raw(builder); + arrow_builder->SetInitialCapacity(capacity); +} + +/** + * garrow_record_batch_builder_get_schema: + * @builder: A #GArrowRecordBatchBuilder. + * + * Returns: (transfer full): The #GArrowSchema of the record batch builder. + * + * Since: 0.8.0 + */ +GArrowSchema * +garrow_record_batch_builder_get_schema(GArrowRecordBatchBuilder *builder) +{ + auto arrow_builder = garrow_record_batch_builder_get_raw(builder); + auto arrow_schema = arrow_builder->schema(); + return garrow_schema_new_raw(&arrow_schema); +} + +/** + * garrow_record_batch_builder_get_n_fields: + * @builder: A #GArrowRecordBatchBuilder. + * + * Returns: The number of fields. + * + * Since: 0.8.0 + * + * Deprecated: 0.13.0: + * Use garrow_record_batch_builder_get_n_columns() instead. + */ +gint +garrow_record_batch_builder_get_n_fields(GArrowRecordBatchBuilder *builder) +{ + return garrow_record_batch_builder_get_n_columns(builder); +} + +/** + * garrow_record_batch_builder_get_n_columns: + * @builder: A #GArrowRecordBatchBuilder. + * + * Returns: The number of columns. + * + * Since: 0.13.0 + */ +gint +garrow_record_batch_builder_get_n_columns(GArrowRecordBatchBuilder *builder) +{ + auto arrow_builder = garrow_record_batch_builder_get_raw(builder); + return arrow_builder->num_fields(); +} + +/** + * garrow_record_batch_builder_get_field: + * @builder: A #GArrowRecordBatchBuilder. + * @i: The field index. If it's negative, index is counted backward + * from the end of the fields. `-1` means the last field. + * + * Returns: (transfer none) (nullable): The #GArrowArrayBuilder for + * the `i`-th field on success, %NULL on out of index. + * + * Since: 0.8.0 + * + * Deprecated: 0.13.0: + * Use garrow_record_batch_builder_get_column_builder() instead. + */ +GArrowArrayBuilder * +garrow_record_batch_builder_get_field(GArrowRecordBatchBuilder *builder, + gint i) +{ + return garrow_record_batch_builder_get_column_builder(builder, i); +} + +/** + * garrow_record_batch_builder_get_column_builder: + * @builder: A #GArrowRecordBatchBuilder. + * @i: The column index. If it's negative, index is counted backward + * from the end of the columns. `-1` means the last column. + * + * Returns: (transfer none) (nullable): The #GArrowArrayBuilder for + * the `i`-th column on success, %NULL on out of index. + * + * Since: 0.13.0 + */ +GArrowArrayBuilder * +garrow_record_batch_builder_get_column_builder(GArrowRecordBatchBuilder *builder, + gint i) +{ + auto priv = GARROW_RECORD_BATCH_BUILDER_GET_PRIVATE(builder); + if (i < 0) { + i += priv->column_builders->len; + } + if (i < 0) { + return NULL; + } + if (static_cast<guint>(i) >= priv->column_builders->len) { + return NULL; + } + + return GARROW_ARRAY_BUILDER(g_ptr_array_index(priv->column_builders, i)); +} + +/** + * garrow_record_batch_builder_flush: + * @builder: A #GArrowRecordBatchBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): The built #GArrowRecordBatch on success, + * %NULL on error. + * + * Since: 0.8.0 + */ +GArrowRecordBatch * +garrow_record_batch_builder_flush(GArrowRecordBatchBuilder *builder, + GError **error) +{ + auto arrow_builder = garrow_record_batch_builder_get_raw(builder); + std::shared_ptr<arrow::RecordBatch> arrow_record_batch; + auto status = arrow_builder->Flush(&arrow_record_batch); + if (garrow_error_check(error, status, "[record-batch-builder][flush]")) { + return garrow_record_batch_new_raw(&arrow_record_batch); + } else { + return NULL; + } +} + +G_END_DECLS + +GArrowRecordBatchBuilder * +garrow_record_batch_builder_new_raw(arrow::RecordBatchBuilder *arrow_builder) +{ + auto builder = g_object_new(GARROW_TYPE_RECORD_BATCH_BUILDER, + "record-batch-builder", arrow_builder, + NULL); + return GARROW_RECORD_BATCH_BUILDER(builder); +} + +arrow::RecordBatchBuilder * +garrow_record_batch_builder_get_raw(GArrowRecordBatchBuilder *builder) +{ + auto priv = GARROW_RECORD_BATCH_BUILDER_GET_PRIVATE(builder); + return priv->record_batch_builder; +} diff --git a/src/arrow/c_glib/arrow-glib/table-builder.h b/src/arrow/c_glib/arrow-glib/table-builder.h new file mode 100644 index 000000000..a76793953 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/table-builder.h @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/array-builder.h> +#include <arrow-glib/gobject-type.h> +#include <arrow-glib/record-batch.h> +#include <arrow-glib/schema.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_RECORD_BATCH_BUILDER (garrow_record_batch_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchBuilder, + garrow_record_batch_builder, + GARROW, + RECORD_BATCH_BUILDER, + GObject) +struct _GArrowRecordBatchBuilderClass +{ + GObjectClass parent_class; +}; + +GArrowRecordBatchBuilder *garrow_record_batch_builder_new(GArrowSchema *schema, + GError **error); + +gint64 garrow_record_batch_builder_get_initial_capacity(GArrowRecordBatchBuilder *builder); +void garrow_record_batch_builder_set_initial_capacity(GArrowRecordBatchBuilder *builder, + gint64 capacity); +GArrowSchema *garrow_record_batch_builder_get_schema(GArrowRecordBatchBuilder *builder); + +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_13_FOR(garrow_record_batch_builder_get_n_columns) +gint garrow_record_batch_builder_get_n_fields(GArrowRecordBatchBuilder *builder); +#endif +GARROW_AVAILABLE_IN_0_13 +gint +garrow_record_batch_builder_get_n_columns(GArrowRecordBatchBuilder *builder); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_13_FOR(garrow_record_batch_builder_get_column_builder) +GArrowArrayBuilder *garrow_record_batch_builder_get_field(GArrowRecordBatchBuilder *builder, + gint i); +#endif +GARROW_AVAILABLE_IN_0_13 +GArrowArrayBuilder * +garrow_record_batch_builder_get_column_builder(GArrowRecordBatchBuilder *builder, + gint i); + +GArrowRecordBatch *garrow_record_batch_builder_flush(GArrowRecordBatchBuilder *builder, + GError **error); + + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/table-builder.hpp b/src/arrow/c_glib/arrow-glib/table-builder.hpp new file mode 100644 index 000000000..cf93ded9b --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/table-builder.hpp @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/api.h> + +#include <arrow-glib/table-builder.h> + +GArrowRecordBatchBuilder *garrow_record_batch_builder_new_raw(arrow::RecordBatchBuilder *arrow_builder); +arrow::RecordBatchBuilder *garrow_record_batch_builder_get_raw(GArrowRecordBatchBuilder *builder); diff --git a/src/arrow/c_glib/arrow-glib/table.cpp b/src/arrow/c_glib/arrow-glib/table.cpp new file mode 100644 index 000000000..f303c0999 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/table.cpp @@ -0,0 +1,965 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/array.hpp> +#include <arrow-glib/chunked-array.hpp> +#include <arrow-glib/enums.h> +#include <arrow-glib/error.hpp> +#include <arrow-glib/field.hpp> +#include <arrow-glib/internal-index.hpp> +#include <arrow-glib/output-stream.hpp> +#include <arrow-glib/record-batch.hpp> +#include <arrow-glib/schema.hpp> +#include <arrow-glib/table.hpp> + +#include <sstream> + +G_BEGIN_DECLS + +/** + * SECTION: table + * @section_id: GArrowTable + * @title: GArrowTable + * @short_description: Table class + * @include: arrow-glib/arrow-glib.h + * + * #GArrowTableConcatenateOptions is a class for customizing + * garrow_table_concatenate() behavior. + * + * #GArrowTable is a class for table. Table has zero or more + * #GArrowChunkedArrays and zero or more records. + * + * #GArrowFeatherWriteProperties is a class to customize how to write + * Feather data. + */ + +typedef struct GArrowTableConcatenateOptionsPrivate_ { + arrow::ConcatenateTablesOptions options; +} GArrowTableConcatenateOptionsPrivate; + +enum { + PROP_UNIFY_SCHEMAS = 1, + PROP_PROMOTE_NULLABILITY, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowTableConcatenateOptions, + garrow_table_concatenate_options, + G_TYPE_OBJECT) + +#define GARROW_TABLE_CONCATENATE_OPTIONS_GET_PRIVATE(obj) \ + static_cast<GArrowTableConcatenateOptionsPrivate *>( \ + garrow_table_concatenate_options_get_instance_private( \ + GARROW_TABLE_CONCATENATE_OPTIONS(obj))) + +static void +garrow_table_concatenate_options_finalize(GObject *object) +{ + auto priv = GARROW_TABLE_CONCATENATE_OPTIONS_GET_PRIVATE(object); + priv->options.~ConcatenateTablesOptions(); + G_OBJECT_CLASS(garrow_table_concatenate_options_parent_class)->finalize(object); +} + +static void +garrow_table_concatenate_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_TABLE_CONCATENATE_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_UNIFY_SCHEMAS: + priv->options.unify_schemas = g_value_get_boolean(value); + break; + case PROP_PROMOTE_NULLABILITY: + priv->options.field_merge_options.promote_nullability = + g_value_get_boolean(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_table_concatenate_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_TABLE_CONCATENATE_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_UNIFY_SCHEMAS: + g_value_set_boolean(value, priv->options.unify_schemas); + break; + case PROP_PROMOTE_NULLABILITY: + g_value_set_boolean(value, + priv->options.field_merge_options.promote_nullability); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_table_concatenate_options_init(GArrowTableConcatenateOptions *object) +{ + auto priv = GARROW_TABLE_CONCATENATE_OPTIONS_GET_PRIVATE(object); + new(&(priv->options)) arrow::ConcatenateTablesOptions; +} + +static void +garrow_table_concatenate_options_class_init( + GArrowTableConcatenateOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->finalize = garrow_table_concatenate_options_finalize; + gobject_class->set_property = garrow_table_concatenate_options_set_property; + gobject_class->get_property = garrow_table_concatenate_options_get_property; + + GParamSpec *spec; + + auto default_options = arrow::ConcatenateTablesOptions::Defaults(); + + /** + * GArrowTableConcatenateOptions:unify-schemas: + * + * If true, the schemas of the tables will be first unified with + * fields of the same name being merged, according to + * #GArrowTableConcatenateOptions:promote-nullability, then each + * table will be promoted to the unified schema before being + * concatenated. + * + * Otherwise, all tables should have the same schema. Each column in + * the output table is the result of concatenating the corresponding + * columns in all input tables. + * + * Since: 6.0.0 + */ + spec = g_param_spec_boolean("unify-schemas", + "Unify schemas", + "Whether unifying schemas or not", + default_options.unify_schemas, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_UNIFY_SCHEMAS, spec); + + /** + * GArrowTableConcatenateOptions:promote-nullability: + * + * If true, a #GArrowField of #GArrowNullDataType can be unified + * with a #GArrowField of another type. The unified field will be of + * the other type and become nullable. Nullability will be promoted + * to the looser option (nullable if one is not nullable). + * + * Since: 6.0.0 + */ + spec = g_param_spec_boolean("promote-nullability", + "Promote nullability", + "Whether promoting nullability or not", + default_options.field_merge_options.promote_nullability, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_PROMOTE_NULLABILITY, spec); +} + +/** + * garrow_table_concatenate_options_new: + * + * Returns: A newly created #GArrowTableConcatenateOptions. + * + * Since: 6.0.0 + */ +GArrowTableConcatenateOptions * +garrow_table_concatenate_options_new(void) +{ + return GARROW_TABLE_CONCATENATE_OPTIONS( + g_object_new(GARROW_TYPE_TABLE_CONCATENATE_OPTIONS, + NULL)); +} + + +typedef struct GArrowTablePrivate_ { + std::shared_ptr<arrow::Table> table; +} GArrowTablePrivate; + +enum { + PROP_TABLE = 1, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowTable, + garrow_table, + G_TYPE_OBJECT) + +#define GARROW_TABLE_GET_PRIVATE(obj) \ + static_cast<GArrowTablePrivate *>( \ + garrow_table_get_instance_private( \ + GARROW_TABLE(obj))) + +static void +garrow_table_finalize(GObject *object) +{ + auto priv = GARROW_TABLE_GET_PRIVATE(object); + + priv->table.~shared_ptr(); + + G_OBJECT_CLASS(garrow_table_parent_class)->finalize(object); +} + +static void +garrow_table_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_TABLE_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_TABLE: + priv->table = + *static_cast<std::shared_ptr<arrow::Table> *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_table_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + switch (prop_id) { + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_table_init(GArrowTable *object) +{ + auto priv = GARROW_TABLE_GET_PRIVATE(object); + new(&priv->table) std::shared_ptr<arrow::Table>; +} + +static void +garrow_table_class_init(GArrowTableClass *klass) +{ + GObjectClass *gobject_class; + GParamSpec *spec; + + gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_table_finalize; + gobject_class->set_property = garrow_table_set_property; + gobject_class->get_property = garrow_table_get_property; + + spec = g_param_spec_pointer("table", + "Table", + "The raw std::shared_ptr<arrow::Table> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_TABLE, spec); +} + +/** + * garrow_table_new_values: (skip) + * @schema: The schema of the table. + * @values: The values of the table. All values must be instance of + * the same class. Available classes are #GArrowChunkedArray, + * #GArrowArray and #GArrowRecordBatch. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowTable or %NULL on error. + * + * Since: 0.12.0 + */ +GArrowTable * +garrow_table_new_values(GArrowSchema *schema, + GList *values, + GError **error) +{ + const auto context = "[table][new][values]"; + auto arrow_schema = garrow_schema_get_raw(schema); + std::vector<std::shared_ptr<arrow::ChunkedArray>> arrow_chunked_arrays; + std::vector<std::shared_ptr<arrow::Array>> arrow_arrays; + std::vector<std::shared_ptr<arrow::RecordBatch>> arrow_record_batches; + for (GList *node = values; node; node = node->next) { + if (GARROW_IS_CHUNKED_ARRAY(node->data)) { + auto chunked_array = GARROW_CHUNKED_ARRAY(node->data); + arrow_chunked_arrays.push_back(garrow_chunked_array_get_raw(chunked_array)); + } else if (GARROW_IS_ARRAY(node->data)) { + auto array = GARROW_ARRAY(node->data); + arrow_arrays.push_back(garrow_array_get_raw(array)); + } else if (GARROW_IS_RECORD_BATCH(node->data)) { + auto record_batch = GARROW_RECORD_BATCH(node->data); + arrow_record_batches.push_back(garrow_record_batch_get_raw(record_batch)); + } else { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "%s: %s", + context, + "value must be one of " + "GArrowChunkedArray, GArrowArray and GArrowRecordBatch"); + return NULL; + } + } + + size_t n_types = 0; + if (!arrow_chunked_arrays.empty()) { + ++n_types; + } + if (!arrow_arrays.empty()) { + ++n_types; + } + if (!arrow_record_batches.empty()) { + ++n_types; + } + if (n_types > 1) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "%s: %s", + context, + "all values must be the same objects of " + "GArrowChunkedArray, GArrowArray or GArrowRecordBatch"); + return NULL; + } + + if (!arrow_chunked_arrays.empty()) { + auto arrow_table = arrow::Table::Make(arrow_schema, + std::move(arrow_chunked_arrays)); + auto status = arrow_table->Validate(); + if (garrow_error_check(error, status, context)) { + return garrow_table_new_raw(&arrow_table); + } else { + return NULL; + } + } else if (!arrow_arrays.empty()) { + auto arrow_table = arrow::Table::Make(arrow_schema, std::move(arrow_arrays)); + auto status = arrow_table->Validate(); + if (garrow_error_check(error, status, context)) { + return garrow_table_new_raw(&arrow_table); + } else { + return NULL; + } + } else { + auto maybe_table = arrow::Table::FromRecordBatches( + arrow_schema, std::move(arrow_record_batches)); + if (garrow::check(error, maybe_table, context)) { + return garrow_table_new_raw(&(*maybe_table)); + } else { + return NULL; + } + } +} + +/** + * garrow_table_new_chunked_arrays: + * @schema: The schema of the table. + * @chunked_arrays: (array length=n_chunked_arrays): The chunked arrays of + * the table. + * @n_chunked_arrays: The number of chunked arrays. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowTable or %NULL on error. + * + * Since: 0.15.0 + */ +GArrowTable * +garrow_table_new_chunked_arrays(GArrowSchema *schema, + GArrowChunkedArray **chunked_arrays, + gsize n_chunked_arrays, + GError **error) +{ + auto arrow_schema = garrow_schema_get_raw(schema); + std::vector<std::shared_ptr<arrow::ChunkedArray>> arrow_chunked_arrays; + for (gsize i = 0; i < n_chunked_arrays; ++i) { + auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_arrays[i]); + arrow_chunked_arrays.push_back(arrow_chunked_array); + } + + auto arrow_table = arrow::Table::Make(arrow_schema, arrow_chunked_arrays); + auto status = arrow_table->Validate(); + if (garrow_error_check(error, status, "[table][new][chunked-arrays]")) { + return garrow_table_new_raw(&arrow_table); + } else { + return NULL; + } +} + +/** + * garrow_table_new_arrays: + * @schema: The schema of the table. + * @arrays: (array length=n_arrays): The arrays of the table. + * @n_arrays: The number of arrays. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowTable or %NULL on error. + * + * Since: 0.12.0 + */ +GArrowTable * +garrow_table_new_arrays(GArrowSchema *schema, + GArrowArray **arrays, + gsize n_arrays, + GError **error) +{ + auto arrow_schema = garrow_schema_get_raw(schema); + std::vector<std::shared_ptr<arrow::Array>> arrow_arrays; + for (gsize i = 0; i < n_arrays; ++i) { + arrow_arrays.push_back(garrow_array_get_raw(arrays[i])); + } + + auto arrow_table = arrow::Table::Make(arrow_schema, arrow_arrays); + auto status = arrow_table->Validate(); + if (garrow_error_check(error, status, "[table][new][arrays]")) { + return garrow_table_new_raw(&arrow_table); + } else { + return NULL; + } +} + +/** + * garrow_table_new_record_batches: + * @schema: The schema of the table. + * @record_batches: (array length=n_record_batches): The record batches + * that have data for the table. + * @n_record_batches: The number of record batches. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowTable or %NULL on error. + * + * Since: 0.12.0 + */ +GArrowTable * +garrow_table_new_record_batches(GArrowSchema *schema, + GArrowRecordBatch **record_batches, + gsize n_record_batches, + GError **error) +{ + auto arrow_schema = garrow_schema_get_raw(schema); + std::vector<std::shared_ptr<arrow::RecordBatch>> arrow_record_batches; + for (gsize i = 0; i < n_record_batches; ++i) { + auto arrow_record_batch = garrow_record_batch_get_raw(record_batches[i]); + arrow_record_batches.push_back(arrow_record_batch); + } + + auto maybe_table = arrow::Table::FromRecordBatches(arrow_schema, + arrow_record_batches); + if (garrow::check(error, maybe_table, "[table][new][record-batches]")) { + return garrow_table_new_raw(&(*maybe_table)); + } else { + return NULL; + } +} + +/** + * garrow_table_equal: + * @table: A #GArrowTable. + * @other_table: A #GArrowTable to be compared. + * + * Returns: %TRUE if both of them have the same data, %FALSE + * otherwise. + * + * Since: 0.4.0 + */ +gboolean +garrow_table_equal(GArrowTable *table, GArrowTable *other_table) +{ + const auto arrow_table = garrow_table_get_raw(table); + const auto arrow_other_table = garrow_table_get_raw(other_table); + return arrow_table->Equals(*arrow_other_table); +} + +/** + * garrow_table_equal_metadata: + * @table: A #GArrowTable. + * @other_table: A #GArrowTable to be compared. + * @check_metadata: Whether to compare metadata. + * + * Returns: %TRUE if both of them have the same data, %FALSE + * otherwise. + * + * Since: 0.17.0 + */ +gboolean +garrow_table_equal_metadata(GArrowTable *table, + GArrowTable *other_table, + gboolean check_metadata) +{ + const auto arrow_table = garrow_table_get_raw(table); + const auto arrow_other_table = garrow_table_get_raw(other_table); + return arrow_table->Equals(*arrow_other_table, check_metadata); +} + +/** + * garrow_table_get_schema: + * @table: A #GArrowTable. + * + * Returns: (transfer full): The schema of the table. + */ +GArrowSchema * +garrow_table_get_schema(GArrowTable *table) +{ + const auto arrow_table = garrow_table_get_raw(table); + auto arrow_schema = arrow_table->schema(); + return garrow_schema_new_raw(&arrow_schema); +} + +/** + * garrow_table_get_column_data: + * @table: A #GArrowTable. + * @i: The index of the target column. If it's negative, index is + * counted backward from the end of the columns. `-1` means the last + * column. + * + * Returns: (nullable) (transfer full): The i-th column's data in the table. + * + * Since: 0.15.0 + */ +GArrowChunkedArray * +garrow_table_get_column_data(GArrowTable *table, + gint i) +{ + const auto &arrow_table = garrow_table_get_raw(table); + if (!garrow_internal_index_adjust(i, arrow_table->num_columns())) { + return NULL; + } + auto arrow_column = arrow_table->column(i); + return garrow_chunked_array_new_raw(&arrow_column); +} + +/** + * garrow_table_get_n_columns: + * @table: A #GArrowTable. + * + * Returns: The number of columns in the table. + */ +guint +garrow_table_get_n_columns(GArrowTable *table) +{ + const auto arrow_table = garrow_table_get_raw(table); + return arrow_table->num_columns(); +} + +/** + * garrow_table_get_n_rows: + * @table: A #GArrowTable. + * + * Returns: The number of rows in the table. + */ +guint64 +garrow_table_get_n_rows(GArrowTable *table) +{ + const auto arrow_table = garrow_table_get_raw(table); + return arrow_table->num_rows(); +} + +/** + * garrow_table_add_column: + * @table: A #GArrowTable. + * @i: The index of the new column. + * @field: The field for the column to be added. + * @chunked_array: The column data to be added. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The newly allocated + * #GArrowTable that has a new column or %NULL on error. + * + * Since: 0.15.0 + */ +GArrowTable * +garrow_table_add_column(GArrowTable *table, + guint i, + GArrowField *field, + GArrowChunkedArray *chunked_array, + GError **error) +{ + const auto arrow_table = garrow_table_get_raw(table); + const auto arrow_field = garrow_field_get_raw(field); + const auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array); + auto maybe_new_table = arrow_table->AddColumn(i, + arrow_field, + arrow_chunked_array); + if (garrow::check(error, maybe_new_table, "[table][add-column]")) { + return garrow_table_new_raw(&(*maybe_new_table)); + } else { + return NULL; + } +} + +/** + * garrow_table_remove_column: + * @table: A #GArrowTable. + * @i: The index of the column to be removed. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The newly allocated + * #GArrowTable that doesn't have the column or %NULL on error. + * + * Since: 0.3.0 + */ +GArrowTable * +garrow_table_remove_column(GArrowTable *table, + guint i, + GError **error) +{ + const auto arrow_table = garrow_table_get_raw(table); + auto maybe_new_table = arrow_table->RemoveColumn(i); + if (garrow::check(error, maybe_new_table, "[table][remove-column]")) { + return garrow_table_new_raw(&(*maybe_new_table)); + } else { + return NULL; + } +} + +/** + * garrow_table_replace_column: + * @table: A #GArrowTable. + * @i: The index of the column to be replaced. + * @field: The field for the new column. + * @chunked_array: The newly added column data. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The newly allocated + * #GArrowTable that has @column as the @i-th column or %NULL on + * error. + * + * Since: 0.15.0 + */ +GArrowTable * +garrow_table_replace_column(GArrowTable *table, + guint i, + GArrowField *field, + GArrowChunkedArray *chunked_array, + GError **error) +{ + const auto arrow_table = garrow_table_get_raw(table); + const auto arrow_field = garrow_field_get_raw(field); + const auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array); + auto maybe_new_table = arrow_table->SetColumn(i, + arrow_field, + arrow_chunked_array); + if (garrow::check(error, maybe_new_table, "[table][replace-column]")) { + return garrow_table_new_raw(&(*maybe_new_table)); + } else { + return NULL; + } +} + +/** + * garrow_table_to_string: + * @table: A #GArrowTable. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): + * The formatted table content or %NULL on error. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 0.12.0 + */ +gchar * +garrow_table_to_string(GArrowTable *table, GError **error) +{ + const auto arrow_table = garrow_table_get_raw(table); + return g_strdup(arrow_table->ToString().c_str()); +} + +/** + * garrow_table_concatenate: + * @table: A #GArrowTable. + * @other_tables: (element-type GArrowTable): The tables to be concatenated. + * @options: (nullable): The options to customize concatenation. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The table concatenated vertically. + * + * Since: 0.14.0 + */ +GArrowTable * +garrow_table_concatenate(GArrowTable *table, + GList *other_tables, + GArrowTableConcatenateOptions *options, + GError **error) +{ + auto arrow_table = garrow_table_get_raw(table); + std::vector<std::shared_ptr<arrow::Table>> arrow_tables = { arrow_table }; + for (auto node = other_tables; node; node = g_list_next(node)) { + auto arrow_other_table = garrow_table_get_raw(GARROW_TABLE(node->data)); + arrow_tables.push_back(arrow_other_table); + } + auto arrow_options = arrow::ConcatenateTablesOptions::Defaults(); + if (options) { + auto options_priv = GARROW_TABLE_CONCATENATE_OPTIONS_GET_PRIVATE(options); + arrow_options = options_priv->options; + } + auto arrow_concatenated_table_result = + arrow::ConcatenateTables(arrow_tables, arrow_options); + if (garrow::check(error, + arrow_concatenated_table_result, + "[table][concatenate]")) { + auto arrow_concatenated_table = std::move(*arrow_concatenated_table_result); + return garrow_table_new_raw(&arrow_concatenated_table); + } else { + return NULL; + } +} + +/** + * garrow_table_slice: + * @table: A #GArrowTable. + * @offset: The offset of sub #GArrowTable. If the offset is negative, + * the offset is counted from the last. + * @length: The length of sub #GArrowTable. + * + * Returns: (transfer full): The sub #GArrowTable. It covers + * only from `offset` to `offset + length` range. The sub + * #GArrowTable shares values with the base + * #GArrowTable. + * + * Since: 0.14.0 + */ +GArrowTable * +garrow_table_slice(GArrowTable *table, + gint64 offset, + gint64 length) +{ + const auto arrow_table = garrow_table_get_raw(table); + if (offset < 0) { + offset += arrow_table->num_rows(); + } + auto arrow_sub_table = arrow_table->Slice(offset, length); + return garrow_table_new_raw(&arrow_sub_table); +} + +/** + * garrow_table_combine_chunks: + * @table: A #GArrowTable. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The #GArrowTable with + * chunks combined, or %NULL on error. + * + * Since: 0.16.0 + */ +GArrowTable * +garrow_table_combine_chunks(GArrowTable *table, + GError **error) +{ + const auto arrow_table = garrow_table_get_raw(table); + + auto maybe_new_table = arrow_table->CombineChunks(); + if (garrow::check(error, maybe_new_table, "[table][combine-chunks]")) { + return garrow_table_new_raw(&(*maybe_new_table)); + } else { + return NULL; + } +} + + +typedef struct GArrowFeatherWritePropertiesPrivate_ { + arrow::ipc::feather::WriteProperties properties; +} GArrowFeatherWritePropertiesPrivate; + +enum { + PROP_VERSION = 1, + PROP_CHUNK_SIZE, + PROP_COMPRESSION, + PROP_COMPRESSION_LEVEL, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowFeatherWriteProperties, + garrow_feather_write_properties, + G_TYPE_OBJECT) + +#define GARROW_FEATHER_WRITE_PROPERTIES_GET_PRIVATE(obj) \ + static_cast<GArrowFeatherWritePropertiesPrivate *>( \ + garrow_feather_write_properties_get_instance_private( \ + GARROW_FEATHER_WRITE_PROPERTIES(obj))) + +static void +garrow_feather_write_properties_finalize(GObject *object) +{ + auto priv = GARROW_FEATHER_WRITE_PROPERTIES_GET_PRIVATE(object); + + priv->properties.~WriteProperties(); + + G_OBJECT_CLASS(garrow_feather_write_properties_parent_class)->finalize(object); +} + +static void +garrow_feather_write_properties_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_FEATHER_WRITE_PROPERTIES_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_COMPRESSION: + priv->properties.compression = + static_cast<arrow::Compression::type>(g_value_get_enum(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_feather_write_properties_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_FEATHER_WRITE_PROPERTIES_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_COMPRESSION: + g_value_set_enum(value, priv->properties.compression); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_feather_write_properties_init(GArrowFeatherWriteProperties *object) +{ + auto priv = GARROW_FEATHER_WRITE_PROPERTIES_GET_PRIVATE(object); + new(&priv->properties) arrow::ipc::feather::WriteProperties; + priv->properties = arrow::ipc::feather::WriteProperties::Defaults(); +} + +static void +garrow_feather_write_properties_class_init(GArrowFeatherWritePropertiesClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_feather_write_properties_finalize; + gobject_class->set_property = garrow_feather_write_properties_set_property; + gobject_class->get_property = garrow_feather_write_properties_get_property; + + auto properties = arrow::ipc::feather::WriteProperties::Defaults(); + GParamSpec *spec; + // TODO: version + // TODO: chunk_size + + /** + * GArrowFeatherWriteProperties:compression: + * + * Compression type to use. Only + * %GARROW_COMPRESSION_TYPE_UNCOMPRESSED, + * %GARROW_COMPRESSION_TYPE_LZ4 and %GARROW_COMPRESSION_TYPE_ZSTD + * are supported. The default compression is + * %GARROW_COMPRESSION_TYPE_LZ4 if Apache Arrow C++ is built with + * support for it, otherwise %GARROW_COMPRESSION_TYPE_UNCOMPRESSED. + * %GARROW_COMPRESSION_TYPE_UNCOMPRESSED is set as the object + * default here. + * + * Since: 0.17.0 + */ + spec = g_param_spec_enum("compression", + "Compression", + "The compression type to use", + GARROW_TYPE_COMPRESSION_TYPE, + properties.compression, + static_cast<GParamFlags>(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_COMPRESSION, spec); + // TODO: compression_level +} + +/** + * garrow_feather_write_properties_new: + * + * Returns: A newly created #GArrowFeatherWriteProperties. + * + * Since: 0.17.0 + */ +GArrowFeatherWriteProperties * +garrow_feather_write_properties_new(void) +{ + auto properties = g_object_new(GARROW_TYPE_FEATHER_WRITE_PROPERTIES, NULL); + return GARROW_FEATHER_WRITE_PROPERTIES(properties); +} + +/** + * garrow_table_write_as_feather: + * @table: A #GArrowTable. + * @sink: The output. + * @properties: (nullable): The properties for this write. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Writes the @table as Feather format data to the @sink. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.17.0 + */ +gboolean +garrow_table_write_as_feather(GArrowTable *table, + GArrowOutputStream *sink, + GArrowFeatherWriteProperties *properties, + GError **error) +{ + auto arrow_table = garrow_table_get_raw(table); + auto arrow_sink = garrow_output_stream_get_raw(sink); + arrow::Status status; + if (properties) { + auto arrow_properties = garrow_feather_write_properties_get_raw(properties); + status = arrow::ipc::feather::WriteTable(*arrow_table, + arrow_sink.get(), + *arrow_properties); + } else { + status = arrow::ipc::feather::WriteTable(*arrow_table, arrow_sink.get()); + } + return garrow::check(error, status, "[feather-write-file]"); +} + +G_END_DECLS + +GArrowTable * +garrow_table_new_raw(std::shared_ptr<arrow::Table> *arrow_table) +{ + auto table = GARROW_TABLE(g_object_new(GARROW_TYPE_TABLE, + "table", arrow_table, + NULL)); + return table; +} + +std::shared_ptr<arrow::Table> +garrow_table_get_raw(GArrowTable *table) +{ + auto priv = GARROW_TABLE_GET_PRIVATE(table); + return priv->table; +} + +arrow::ipc::feather::WriteProperties * +garrow_feather_write_properties_get_raw(GArrowFeatherWriteProperties *properties) +{ + auto priv = GARROW_FEATHER_WRITE_PROPERTIES_GET_PRIVATE(properties); + return &(priv->properties); +} diff --git a/src/arrow/c_glib/arrow-glib/table.h b/src/arrow/c_glib/arrow-glib/table.h new file mode 100644 index 000000000..05a95e91a --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/table.h @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/chunked-array.h> +#include <arrow-glib/output-stream.h> +#include <arrow-glib/record-batch.h> +#include <arrow-glib/schema.h> +#include <arrow-glib/version.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_TABLE_CONCATENATE_OPTIONS \ + (garrow_table_concatenate_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowTableConcatenateOptions, + garrow_table_concatenate_options, + GARROW, + TABLE_CONCATENATE_OPTIONS, + GObject) +struct _GArrowTableConcatenateOptionsClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_6_0 +GArrowTableConcatenateOptions * +garrow_table_concatenate_options_new(void); + + +#define GARROW_TYPE_TABLE (garrow_table_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowTable, + garrow_table, + GARROW, + TABLE, + GObject) +struct _GArrowTableClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_12 +GArrowTable * +garrow_table_new_values(GArrowSchema *schema, + GList *values, + GError **error); +GARROW_AVAILABLE_IN_0_15 +GArrowTable * +garrow_table_new_chunked_arrays(GArrowSchema *schema, + GArrowChunkedArray **chunked_arrays, + gsize n_chunked_arrays, + GError **error); +GARROW_AVAILABLE_IN_0_12 +GArrowTable * +garrow_table_new_arrays(GArrowSchema *schema, + GArrowArray **arrays, + gsize n_arrays, + GError **error); +GARROW_AVAILABLE_IN_0_12 +GArrowTable * +garrow_table_new_record_batches(GArrowSchema *schema, + GArrowRecordBatch **record_batches, + gsize n_record_batches, + GError **error); + +gboolean garrow_table_equal (GArrowTable *table, + GArrowTable *other_table); +GARROW_AVAILABLE_IN_0_17 +gboolean +garrow_table_equal_metadata(GArrowTable *table, + GArrowTable *other_table, + gboolean check_metadata); + +GArrowSchema *garrow_table_get_schema (GArrowTable *table); +GARROW_AVAILABLE_IN_0_15 +GArrowChunkedArray * +garrow_table_get_column_data(GArrowTable *table, + gint i); + +guint garrow_table_get_n_columns (GArrowTable *table); +guint64 garrow_table_get_n_rows (GArrowTable *table); + +GARROW_AVAILABLE_IN_0_15 +GArrowTable *garrow_table_add_column (GArrowTable *table, + guint i, + GArrowField *field, + GArrowChunkedArray *chunked_array, + GError **error); +GArrowTable *garrow_table_remove_column (GArrowTable *table, + guint i, + GError **error); +GARROW_AVAILABLE_IN_0_15 +GArrowTable *garrow_table_replace_column(GArrowTable *table, + guint i, + GArrowField *field, + GArrowChunkedArray *chunked_array, + GError **error); +gchar *garrow_table_to_string (GArrowTable *table, + GError **error); +GARROW_AVAILABLE_IN_0_14 +GArrowTable * +garrow_table_concatenate(GArrowTable *table, + GList *other_tables, + GArrowTableConcatenateOptions *options, + GError **error); +GARROW_AVAILABLE_IN_0_14 +GArrowTable * +garrow_table_slice(GArrowTable *table, + gint64 offset, + gint64 length); +GARROW_AVAILABLE_IN_0_16 +GArrowTable * +garrow_table_combine_chunks(GArrowTable *table, + GError **error); + + +#define GARROW_TYPE_FEATHER_WRITE_PROPERTIES \ + (garrow_feather_write_properties_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowFeatherWriteProperties, + garrow_feather_write_properties, + GARROW, + FEATHER_WRITE_PROPERTIES, + GObject) +struct _GArrowFeatherWritePropertiesClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_17 +GArrowFeatherWriteProperties * +garrow_feather_write_properties_new(void); + +GARROW_AVAILABLE_IN_0_17 +gboolean +garrow_table_write_as_feather(GArrowTable *table, + GArrowOutputStream *sink, + GArrowFeatherWriteProperties *properties, + GError **error); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/table.hpp b/src/arrow/c_glib/arrow-glib/table.hpp new file mode 100644 index 000000000..dc972d80c --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/table.hpp @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/api.h> +#include <arrow/ipc/api.h> + +#include <arrow-glib/table.h> + +GArrowTable *garrow_table_new_raw(std::shared_ptr<arrow::Table> *arrow_table); +std::shared_ptr<arrow::Table> garrow_table_get_raw(GArrowTable *table); + +arrow::ipc::feather::WriteProperties * +garrow_feather_write_properties_get_raw(GArrowFeatherWriteProperties *properties); diff --git a/src/arrow/c_glib/arrow-glib/tensor.cpp b/src/arrow/c_glib/arrow-glib/tensor.cpp new file mode 100644 index 000000000..7e6dc80f5 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/tensor.cpp @@ -0,0 +1,464 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/buffer.hpp> +#include <arrow-glib/data-type.hpp> +#include <arrow-glib/tensor.hpp> +#include <arrow-glib/type.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: tensor + * @short_description: Tensor class + * @include: arrow-glib/arrow-glib.h + * + * #GArrowTensor is a tensor class. + * + * Since: 0.3.0 + */ + +typedef struct GArrowTensorPrivate_ { + std::shared_ptr<arrow::Tensor> tensor; + GArrowBuffer *buffer; +} GArrowTensorPrivate; + +enum { + PROP_0, + PROP_TENSOR, + PROP_BUFFER +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowTensor, garrow_tensor, G_TYPE_OBJECT) + +#define GARROW_TENSOR_GET_PRIVATE(obj) \ + static_cast<GArrowTensorPrivate *>( \ + garrow_tensor_get_instance_private( \ + GARROW_TENSOR(obj))) + +static void +garrow_tensor_dispose(GObject *object) +{ + auto priv = GARROW_TENSOR_GET_PRIVATE(object); + + if (priv->buffer) { + g_object_unref(priv->buffer); + priv->buffer = nullptr; + } + + G_OBJECT_CLASS(garrow_tensor_parent_class)->dispose(object); +} + +static void +garrow_tensor_finalize(GObject *object) +{ + auto priv = GARROW_TENSOR_GET_PRIVATE(object); + + priv->tensor.~shared_ptr(); + + G_OBJECT_CLASS(garrow_tensor_parent_class)->finalize(object); +} + +static void +garrow_tensor_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_TENSOR_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_TENSOR: + priv->tensor = + *static_cast<std::shared_ptr<arrow::Tensor> *>(g_value_get_pointer(value)); + break; + case PROP_BUFFER: + priv->buffer = GARROW_BUFFER(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_tensor_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_TENSOR_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_BUFFER: + g_value_set_object(value, priv->buffer); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_tensor_init(GArrowTensor *object) +{ + auto priv = GARROW_TENSOR_GET_PRIVATE(object); + new(&priv->tensor) std::shared_ptr<arrow::Tensor>; +} + +static void +garrow_tensor_class_init(GArrowTensorClass *klass) +{ + GParamSpec *spec; + + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_tensor_dispose; + gobject_class->finalize = garrow_tensor_finalize; + gobject_class->set_property = garrow_tensor_set_property; + gobject_class->get_property = garrow_tensor_get_property; + + spec = g_param_spec_pointer("tensor", + "Tensor", + "The raw std::shared<arrow::Tensor> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_TENSOR, spec); + + spec = g_param_spec_object("buffer", + "Buffer", + "The data", + GARROW_TYPE_BUFFER, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_BUFFER, spec); +} + +/** + * garrow_tensor_new: + * @data_type: A #GArrowDataType that indicates each element type + * in the tensor. + * @data: A #GArrowBuffer that contains tensor data. + * @shape: (array length=n_dimensions): A list of dimension sizes. + * @n_dimensions: The number of dimensions. + * @strides: (array length=n_strides) (nullable): A list of the number of + * bytes in each dimension. + * @n_strides: The number of strides. + * @dimension_names: (array length=n_dimension_names) (nullable): A list of + * dimension names. + * @n_dimension_names: The number of dimension names + * + * Returns: The newly created #GArrowTensor. + * + * Since: 0.3.0 + */ +GArrowTensor * +garrow_tensor_new(GArrowDataType *data_type, + GArrowBuffer *data, + gint64 *shape, + gsize n_dimensions, + gint64 *strides, + gsize n_strides, + gchar **dimension_names, + gsize n_dimension_names) +{ + auto arrow_data_type = garrow_data_type_get_raw(data_type); + auto arrow_data = garrow_buffer_get_raw(data); + std::vector<int64_t> arrow_shape; + for (gsize i = 0; i < n_dimensions; ++i) { + arrow_shape.push_back(shape[i]); + } + std::vector<int64_t> arrow_strides; + for (gsize i = 0; i < n_strides; ++i) { + arrow_strides.push_back(strides[i]); + } + std::vector<std::string> arrow_dimension_names; + for (gsize i = 0; i < n_dimension_names; ++i) { + arrow_dimension_names.push_back(dimension_names[i]); + } + auto arrow_tensor = + std::make_shared<arrow::Tensor>(arrow_data_type, + arrow_data, + arrow_shape, + arrow_strides, + arrow_dimension_names); + auto tensor = garrow_tensor_new_raw_buffer(&arrow_tensor, data); + return tensor; +} + +/** + * garrow_tensor_equal: + * @tensor: A #GArrowTensor. + * @other_tensor: A #GArrowTensor to be compared. + * + * Returns: %TRUE if both of them have the same data, %FALSE + * otherwise. + * + * Since: 0.4.0 + */ +gboolean +garrow_tensor_equal(GArrowTensor *tensor, GArrowTensor *other_tensor) +{ + const auto arrow_tensor = garrow_tensor_get_raw(tensor); + const auto arrow_other_tensor = garrow_tensor_get_raw(other_tensor); + return arrow_tensor->Equals(*arrow_other_tensor); +} + +/** + * garrow_tensor_get_value_data_type: + * @tensor: A #GArrowTensor. + * + * Returns: (transfer full): The data type of each value in the tensor. + * + * Since: 0.3.0 + */ +GArrowDataType * +garrow_tensor_get_value_data_type(GArrowTensor *tensor) +{ + auto arrow_tensor = garrow_tensor_get_raw(tensor); + auto arrow_data_type = arrow_tensor->type(); + return garrow_data_type_new_raw(&arrow_data_type); +} + +/** + * garrow_tensor_get_value_type: + * @tensor: A #GArrowTensor. + * + * Returns: The type of each value in the tensor. + * + * Since: 0.3.0 + */ +GArrowType +garrow_tensor_get_value_type(GArrowTensor *tensor) +{ + auto arrow_tensor = garrow_tensor_get_raw(tensor); + auto arrow_type = arrow_tensor->type_id(); + return garrow_type_from_raw(arrow_type); +} + +/** + * garrow_tensor_get_buffer: + * @tensor: A #GArrowTensor. + * + * Returns: (transfer full): The data of the tensor. + * + * Since: 0.3.0 + */ +GArrowBuffer * +garrow_tensor_get_buffer(GArrowTensor *tensor) +{ + auto priv = GARROW_TENSOR_GET_PRIVATE(tensor); + if (priv->buffer) { + g_object_ref(priv->buffer); + return priv->buffer; + } + + auto arrow_tensor = garrow_tensor_get_raw(tensor); + auto arrow_buffer = arrow_tensor->data(); + return garrow_buffer_new_raw(&arrow_buffer); +} + +/** + * garrow_tensor_get_shape: + * @tensor: A #GArrowTensor. + * @n_dimensions: (out): The number of dimensions. + * + * Returns: (array length=n_dimensions) (transfer full): + * The shape of the tensor. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 0.3.0 + */ +gint64 * +garrow_tensor_get_shape(GArrowTensor *tensor, gint *n_dimensions) +{ + auto arrow_tensor = garrow_tensor_get_raw(tensor); + auto arrow_shape = arrow_tensor->shape(); + auto n_dimensions_raw = arrow_shape.size(); + auto shape = + static_cast<gint64 *>(g_malloc_n(sizeof(gint64), n_dimensions_raw)); + for (gsize i = 0; i < n_dimensions_raw; ++i) { + shape[i] = arrow_shape[i]; + } + *n_dimensions = static_cast<gint>(n_dimensions_raw); + return shape; +} + +/** + * garrow_tensor_get_strides: + * @tensor: A #GArrowTensor. + * @n_strides: (out): The number of strides. + * + * Returns: (array length=n_strides) (transfer full): + * The strides of the tensor. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 0.3.0 + */ +gint64 * +garrow_tensor_get_strides(GArrowTensor *tensor, gint *n_strides) +{ + auto arrow_tensor = garrow_tensor_get_raw(tensor); + auto arrow_strides = arrow_tensor->strides(); + auto n_strides_raw = arrow_strides.size(); + auto strides = + static_cast<gint64 *>(g_malloc_n(sizeof(gint64), n_strides_raw)); + for (gsize i = 0; i < n_strides_raw; ++i) { + strides[i] = arrow_strides[i]; + } + *n_strides = static_cast<gint>(n_strides_raw); + return strides; +} + +/** + * garrow_tensor_get_n_dimensions: + * @tensor: A #GArrowTensor. + * + * Returns: The number of dimensions of the tensor. + * + * Since: 0.3.0 + */ +gint +garrow_tensor_get_n_dimensions(GArrowTensor *tensor) +{ + auto arrow_tensor = garrow_tensor_get_raw(tensor); + return arrow_tensor->ndim(); +} + +/** + * garrow_tensor_get_dimension_name: + * @tensor: A #GArrowTensor. + * @i: The index of the target dimension. + * + * Returns: The i-th dimension name of the tensor. + * + * Since: 0.3.0 + */ +const gchar * +garrow_tensor_get_dimension_name(GArrowTensor *tensor, gint i) +{ + auto arrow_tensor = garrow_tensor_get_raw(tensor); + auto arrow_dimension_name = &(arrow_tensor->dim_name(i)); + return arrow_dimension_name->c_str(); +} + +/** + * garrow_tensor_get_size: + * @tensor: A #GArrowTensor. + * + * Returns: The number of value cells in the tensor. + * + * Since: 0.3.0 + */ +gint64 +garrow_tensor_get_size(GArrowTensor *tensor) +{ + auto arrow_tensor = garrow_tensor_get_raw(tensor); + return arrow_tensor->size(); +} + +/** + * garrow_tensor_is_mutable: + * @tensor: A #GArrowTensor. + * + * Returns: %TRUE if the tensor is mutable, %FALSE otherwise. + * + * Since: 0.3.0 + */ +gboolean +garrow_tensor_is_mutable(GArrowTensor *tensor) +{ + auto arrow_tensor = garrow_tensor_get_raw(tensor); + return arrow_tensor->is_mutable(); +} + +/** + * garrow_tensor_is_contiguous: + * @tensor: A #GArrowTensor. + * + * Returns: %TRUE if the tensor is contiguous, %FALSE otherwise. + * + * Since: 0.3.0 + */ +gboolean +garrow_tensor_is_contiguous(GArrowTensor *tensor) +{ + auto arrow_tensor = garrow_tensor_get_raw(tensor); + return arrow_tensor->is_contiguous(); +} + +/** + * garrow_tensor_is_row_major: + * @tensor: A #GArrowTensor. + * + * Returns: %TRUE if the tensor is row major a.k.a. C order, + * %FALSE otherwise. + * + * Since: 0.3.0 + */ +gboolean +garrow_tensor_is_row_major(GArrowTensor *tensor) +{ + auto arrow_tensor = garrow_tensor_get_raw(tensor); + return arrow_tensor->is_row_major(); +} + +/** + * garrow_tensor_is_column_major: + * @tensor: A #GArrowTensor. + * + * Returns: %TRUE if the tensor is column major a.k.a. Fortran order, + * %FALSE otherwise. + * + * Since: 0.3.0 + */ +gboolean +garrow_tensor_is_column_major(GArrowTensor *tensor) +{ + auto arrow_tensor = garrow_tensor_get_raw(tensor); + return arrow_tensor->is_column_major(); +} + +G_END_DECLS + +GArrowTensor * +garrow_tensor_new_raw(std::shared_ptr<arrow::Tensor> *arrow_tensor) +{ + return garrow_tensor_new_raw_buffer(arrow_tensor, nullptr); +} + +GArrowTensor * +garrow_tensor_new_raw_buffer(std::shared_ptr<arrow::Tensor> *arrow_tensor, + GArrowBuffer *buffer) +{ + auto tensor = GARROW_TENSOR(g_object_new(GARROW_TYPE_TENSOR, + "tensor", arrow_tensor, + "buffer", buffer, + NULL)); + return tensor; +} + +std::shared_ptr<arrow::Tensor> +garrow_tensor_get_raw(GArrowTensor *tensor) +{ + auto priv = GARROW_TENSOR_GET_PRIVATE(tensor); + return priv->tensor; +} diff --git a/src/arrow/c_glib/arrow-glib/tensor.h b/src/arrow/c_glib/arrow-glib/tensor.h new file mode 100644 index 000000000..daa3a8905 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/tensor.h @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/buffer.h> +#include <arrow-glib/data-type.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_TENSOR (garrow_tensor_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowTensor, + garrow_tensor, + GARROW, + TENSOR, + GObject) +struct _GArrowTensorClass +{ + GObjectClass parent_class; +}; + +GArrowTensor *garrow_tensor_new (GArrowDataType *data_type, + GArrowBuffer *data, + gint64 *shape, + gsize n_dimensions, + gint64 *strides, + gsize n_strides, + gchar **dimension_names, + gsize n_dimension_names); +gboolean garrow_tensor_equal (GArrowTensor *tensor, + GArrowTensor *other_tensor); +GArrowDataType *garrow_tensor_get_value_data_type(GArrowTensor *tensor); +GArrowType garrow_tensor_get_value_type (GArrowTensor *tensor); +GArrowBuffer *garrow_tensor_get_buffer (GArrowTensor *tensor); +gint64 *garrow_tensor_get_shape (GArrowTensor *tensor, + gint *n_dimensions); +gint64 *garrow_tensor_get_strides (GArrowTensor *tensor, + gint *n_strides); +gint garrow_tensor_get_n_dimensions (GArrowTensor *tensor); +const gchar *garrow_tensor_get_dimension_name (GArrowTensor *tensor, + gint i); +gint64 garrow_tensor_get_size (GArrowTensor *tensor); +gboolean garrow_tensor_is_mutable (GArrowTensor *tensor); +gboolean garrow_tensor_is_contiguous (GArrowTensor *tensor); +gboolean garrow_tensor_is_row_major (GArrowTensor *tensor); +gboolean garrow_tensor_is_column_major (GArrowTensor *tensor); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/tensor.hpp b/src/arrow/c_glib/arrow-glib/tensor.hpp new file mode 100644 index 000000000..c90dc6d4d --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/tensor.hpp @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/api.h> + +#include <arrow-glib/tensor.h> + +GArrowTensor *garrow_tensor_new_raw(std::shared_ptr<arrow::Tensor> *arrow_tensor); +GArrowTensor *garrow_tensor_new_raw_buffer(std::shared_ptr<arrow::Tensor> *arrow_tensor, + GArrowBuffer *buffer); +std::shared_ptr<arrow::Tensor> garrow_tensor_get_raw(GArrowTensor *tensor); diff --git a/src/arrow/c_glib/arrow-glib/type.cpp b/src/arrow/c_glib/arrow-glib/type.cpp new file mode 100644 index 000000000..3f978108f --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/type.cpp @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/type.hpp> + +/** + * SECTION: type + * @title: GArrowType + * @short_description: Type mapping between Arrow and arrow-glib + * + * #GArrowType provides types corresponding to `arrow::Type::type` + * values. + */ + +GArrowType +garrow_type_from_raw(arrow::Type::type type) +{ + switch (type) { + case arrow::Type::type::NA: + return GARROW_TYPE_NA; + case arrow::Type::type::BOOL: + return GARROW_TYPE_BOOLEAN; + case arrow::Type::type::UINT8: + return GARROW_TYPE_UINT8; + case arrow::Type::type::INT8: + return GARROW_TYPE_INT8; + case arrow::Type::type::UINT16: + return GARROW_TYPE_UINT16; + case arrow::Type::type::INT16: + return GARROW_TYPE_INT16; + case arrow::Type::type::UINT32: + return GARROW_TYPE_UINT32; + case arrow::Type::type::INT32: + return GARROW_TYPE_INT32; + case arrow::Type::type::UINT64: + return GARROW_TYPE_UINT64; + case arrow::Type::type::INT64: + return GARROW_TYPE_INT64; + case arrow::Type::type::HALF_FLOAT: + return GARROW_TYPE_HALF_FLOAT; + case arrow::Type::type::FLOAT: + return GARROW_TYPE_FLOAT; + case arrow::Type::type::DOUBLE: + return GARROW_TYPE_DOUBLE; + case arrow::Type::type::STRING: + return GARROW_TYPE_STRING; + case arrow::Type::type::LARGE_STRING: + return GARROW_TYPE_LARGE_STRING; + case arrow::Type::type::BINARY: + return GARROW_TYPE_BINARY; + case arrow::Type::type::LARGE_BINARY: + return GARROW_TYPE_LARGE_BINARY; + case arrow::Type::type::FIXED_SIZE_BINARY: + return GARROW_TYPE_FIXED_SIZE_BINARY; + case arrow::Type::type::DATE32: + return GARROW_TYPE_DATE32; + case arrow::Type::type::DATE64: + return GARROW_TYPE_DATE64; + case arrow::Type::type::TIMESTAMP: + return GARROW_TYPE_TIMESTAMP; + case arrow::Type::type::TIME32: + return GARROW_TYPE_TIME32; + case arrow::Type::type::TIME64: + return GARROW_TYPE_TIME64; + case arrow::Type::type::INTERVAL_MONTHS: + return GARROW_TYPE_INTERVAL_MONTHS; + case arrow::Type::type::INTERVAL_DAY_TIME: + return GARROW_TYPE_INTERVAL_DAY_TIME; + case arrow::Type::type::DECIMAL128: + return GARROW_TYPE_DECIMAL128; + case arrow::Type::type::DECIMAL256: + return GARROW_TYPE_DECIMAL256; + case arrow::Type::type::LIST: + return GARROW_TYPE_LIST; + case arrow::Type::type::LARGE_LIST: + return GARROW_TYPE_LARGE_LIST; + case arrow::Type::type::STRUCT: + return GARROW_TYPE_STRUCT; + case arrow::Type::type::MAP: + return GARROW_TYPE_MAP; + case arrow::Type::type::EXTENSION: + return GARROW_TYPE_EXTENSION; + case arrow::Type::type::SPARSE_UNION: + return GARROW_TYPE_SPARSE_UNION; + case arrow::Type::type::DENSE_UNION: + return GARROW_TYPE_DENSE_UNION; + case arrow::Type::type::DICTIONARY: + return GARROW_TYPE_DICTIONARY; + default: + return GARROW_TYPE_NA; + } +} + +GArrowTimeUnit +garrow_time_unit_from_raw(arrow::TimeUnit::type unit) +{ + switch (unit) { + case arrow::TimeUnit::type::SECOND: + return GARROW_TIME_UNIT_SECOND; + case arrow::TimeUnit::type::MILLI: + return GARROW_TIME_UNIT_MILLI; + case arrow::TimeUnit::type::MICRO: + return GARROW_TIME_UNIT_MICRO; + case arrow::TimeUnit::type::NANO: + return GARROW_TIME_UNIT_NANO; + default: + return GARROW_TIME_UNIT_SECOND; + } +} + +arrow::TimeUnit::type +garrow_time_unit_to_raw(GArrowTimeUnit unit) +{ + switch (unit) { + case GARROW_TIME_UNIT_SECOND: + return arrow::TimeUnit::type::SECOND; + case GARROW_TIME_UNIT_MILLI: + return arrow::TimeUnit::type::MILLI; + case GARROW_TIME_UNIT_MICRO: + return arrow::TimeUnit::type::MICRO; + case GARROW_TIME_UNIT_NANO: + return arrow::TimeUnit::type::NANO; + default: + return arrow::TimeUnit::type::SECOND; + } +} diff --git a/src/arrow/c_glib/arrow-glib/type.h b/src/arrow/c_glib/arrow-glib/type.h new file mode 100644 index 000000000..456920dbb --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/type.h @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <glib-object.h> + +G_BEGIN_DECLS + +/** + * GArrowType: + * @GARROW_TYPE_NA: A degenerate NULL type represented as 0 bytes/bits. + * @GARROW_TYPE_BOOLEAN: A boolean value represented as 1-bit. + * @GARROW_TYPE_UINT8: Little-endian 8-bit unsigned integer. + * @GARROW_TYPE_INT8: Little-endian 8-bit signed integer. + * @GARROW_TYPE_UINT16: Little-endian 16-bit unsigned integer. + * @GARROW_TYPE_INT16: Little-endian 16-bit signed integer. + * @GARROW_TYPE_UINT32: Little-endian 32-bit unsigned integer. + * @GARROW_TYPE_INT32: Little-endian 32-bit signed integer. + * @GARROW_TYPE_UINT64: Little-endian 64-bit unsigned integer. + * @GARROW_TYPE_INT64: Little-endian 64-bit signed integer. + * @GARROW_TYPE_HALF_FLOAT: 2-byte floating point value. + * @GARROW_TYPE_FLOAT: 4-byte floating point value. + * @GARROW_TYPE_DOUBLE: 8-byte floating point value. + * @GARROW_TYPE_STRING: UTF-8 variable-length string. + * @GARROW_TYPE_BINARY: Variable-length bytes (no guarantee of UTF-8-ness). + * @GARROW_TYPE_FIXED_SIZE_BINARY: Fixed-size binary. Each value occupies + * the same number of bytes. + * @GARROW_TYPE_DATE32: int32 days since the UNIX epoch. + * @GARROW_TYPE_DATE64: int64 milliseconds since the UNIX epoch. + * @GARROW_TYPE_TIMESTAMP: Exact timestamp encoded with int64 since UNIX epoch. + * Default unit millisecond. + * @GARROW_TYPE_TIME32: Exact time encoded with int32, supporting seconds or milliseconds + * @GARROW_TYPE_TIME64: Exact time encoded with int64, supporting micro- or nanoseconds + * @GARROW_TYPE_INTERVAL_MONTHS: YEAR_MONTH interval in SQL style. + * @GARROW_TYPE_INTERVAL_DAY_TIME: DAY_TIME interval in SQL style. + * @GARROW_TYPE_DECIMAL128: Precision- and scale-based decimal + * type with 128-bit. Storage type depends on the parameters. + * @GARROW_TYPE_DECIMAL256: Precision- and scale-based decimal + * type with 256-bit. Storage type depends on the parameters. + * @GARROW_TYPE_LIST: A list of some logical data type. + * @GARROW_TYPE_STRUCT: Struct of logical types. + * @GARROW_TYPE_SPARSE_UNION: Sparse unions of logical types. + * @GARROW_TYPE_DENSE_UNION: Dense unions of logical types. + * @GARROW_TYPE_DICTIONARY: Dictionary aka Category type. + * @GARROW_TYPE_MAP: A repeated struct logical type. + * @GARROW_TYPE_EXTENSION: Custom data type, implemented by user. + * @GARROW_TYPE_FIXED_SIZE_LIST: Fixed size list of some logical type. + * @GARROW_TYPE_DURATION: Measure of elapsed time in either seconds, + * milliseconds, microseconds or nanoseconds. + * @GARROW_TYPE_LARGE_STRING: 64bit offsets UTF-8 variable-length string. + * @GARROW_TYPE_LARGE_BINARY: 64bit offsets Variable-length bytes (no guarantee of UTF-8-ness). + * @GARROW_TYPE_LARGE_LIST: A list of some logical data type with 64-bit offsets. + * + * They are corresponding to `arrow::Type::type` values. + */ +typedef enum { + GARROW_TYPE_NA, + GARROW_TYPE_BOOLEAN, + GARROW_TYPE_UINT8, + GARROW_TYPE_INT8, + GARROW_TYPE_UINT16, + GARROW_TYPE_INT16, + GARROW_TYPE_UINT32, + GARROW_TYPE_INT32, + GARROW_TYPE_UINT64, + GARROW_TYPE_INT64, + GARROW_TYPE_HALF_FLOAT, + GARROW_TYPE_FLOAT, + GARROW_TYPE_DOUBLE, + GARROW_TYPE_STRING, + GARROW_TYPE_BINARY, + GARROW_TYPE_FIXED_SIZE_BINARY, + GARROW_TYPE_DATE32, + GARROW_TYPE_DATE64, + GARROW_TYPE_TIMESTAMP, + GARROW_TYPE_TIME32, + GARROW_TYPE_TIME64, + GARROW_TYPE_INTERVAL_MONTHS, + GARROW_TYPE_INTERVAL_DAY_TIME, + GARROW_TYPE_DECIMAL128, + GARROW_TYPE_DECIMAL256, + GARROW_TYPE_LIST, + GARROW_TYPE_STRUCT, + GARROW_TYPE_SPARSE_UNION, + GARROW_TYPE_DENSE_UNION, + GARROW_TYPE_DICTIONARY, + GARROW_TYPE_MAP, + GARROW_TYPE_EXTENSION, + GARROW_TYPE_FIXED_SIZE_LIST, + GARROW_TYPE_DURATION, + GARROW_TYPE_LARGE_STRING, + GARROW_TYPE_LARGE_BINARY, + GARROW_TYPE_LARGE_LIST +} GArrowType; + +/** + * GArrowTimeUnit: + * @GARROW_TIME_UNIT_SECOND: Second. + * @GARROW_TIME_UNIT_MILLI: Millisecond. + * @GARROW_TIME_UNIT_MICRO: Microsecond. + * @GARROW_TIME_UNIT_NANO: Nanosecond. + * + * They are corresponding to `arrow::TimeUnit::type` values. + */ +typedef enum { + GARROW_TIME_UNIT_SECOND, + GARROW_TIME_UNIT_MILLI, + GARROW_TIME_UNIT_MICRO, + GARROW_TIME_UNIT_NANO +} GArrowTimeUnit; + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/type.hpp b/src/arrow/c_glib/arrow-glib/type.hpp new file mode 100644 index 000000000..6f6569712 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/type.hpp @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/api.h> + +#include <arrow-glib/type.h> + +GArrowType garrow_type_from_raw(arrow::Type::type type); +GArrowTimeUnit garrow_time_unit_from_raw(arrow::TimeUnit::type unit); +arrow::TimeUnit::type garrow_time_unit_to_raw(GArrowTimeUnit unit); diff --git a/src/arrow/c_glib/arrow-glib/version.h.in b/src/arrow/c_glib/arrow-glib/version.h.in new file mode 100644 index 000000000..7b7174e66 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/version.h.in @@ -0,0 +1,457 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <glib.h> + +/** + * SECTION: version + * @section_id: version-macros + * @title: Version related macros + * @include: arrow-glib/arrow-glib.h + * + * Apache Arrow GLib provides macros that can be used by C pre-processor. + * They are useful to check version related things at compile time. + */ + +/** + * GARROW_VERSION_MAJOR: + * + * The major version. + * + * Since: 0.10.0 + */ +#define GARROW_VERSION_MAJOR (@GARROW_VERSION_MAJOR@) + +/** + * GARROW_VERSION_MINOR: + * + * The minor version. + * + * Since: 0.10.0 + */ +#define GARROW_VERSION_MINOR (@GARROW_VERSION_MINOR@) + +/** + * GARROW_VERSION_MICRO: + * + * The micro version. + * + * Since: 0.10.0 + */ +#define GARROW_VERSION_MICRO (@GARROW_VERSION_MICRO@) + +/** + * GARROW_VERSION_TAG: + * + * The version tag. Normally, it's an empty string. It's "SNAPSHOT" + * for snapshot version. + * + * Since: 0.10.0 + */ +#define GARROW_VERSION_TAG "@GARROW_VERSION_TAG@" + +/** + * GARROW_VERSION_CHECK: + * @major: A major version to check for. + * @minor: A minor version to check for. + * @micro: A micro version to check for. + * + * You can use this macro in C pre-processor. + * + * Returns: %TRUE if the compile time Apache Arrow GLib version is the + * same as or newer than the passed version, %FALSE otherwise. + * + * Since: 0.10.0 + */ +#define GARROW_VERSION_CHECK(major, minor, micro) \ + (GARROW_VERSION_MAJOR > (major) || \ + (GARROW_VERSION_MAJOR == (major) && \ + GARROW_VERSION_MINOR > (minor)) || \ + (GARROW_VERSION_MAJOR == (major) && \ + GARROW_VERSION_MINOR == (minor) && \ + GARROW_VERSION_MICRO >= (micro))) + +/** + * GARROW_DISABLE_DEPRECATION_WARNINGS: + * + * If this macro is defined, no deprecated warnings are produced. + * + * You must define this macro before including the + * arrow-glib/arrow-glib.h header. + * + * Since: 0.10.0 + */ + +#ifdef GARROW_DISABLE_DEPRECATION_WARNINGS +# define GARROW_DEPRECATED +# define GARROW_DEPRECATED_FOR(function) +# define GARROW_UNAVAILABLE(major, minor) +#else +# define GARROW_DEPRECATED G_DEPRECATED +# define GARROW_DEPRECATED_FOR(function) G_DEPRECATED_FOR(function) +# define GARROW_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) +#endif + +/** + * GARROW_VERSION_6_0: + * + * You can use this macro value for compile time API version check. + * + * Since: 6.0.0 + */ +#define GARROW_VERSION_6_0 G_ENCODE_VERSION(6, 0) + +/** + * GARROW_VERSION_5_0: + * + * You can use this macro value for compile time API version check. + * + * Since: 5.0.0 + */ +#define GARROW_VERSION_5_0 G_ENCODE_VERSION(5, 0) + +/** + * GARROW_VERSION_4_0: + * + * You can use this macro value for compile time API version check. + * + * Since: 4.0.0 + */ +#define GARROW_VERSION_4_0 G_ENCODE_VERSION(4, 0) + +/** + * GARROW_VERSION_3_0: + * + * You can use this macro value for compile time API version check. + * + * Since: 3.0.0 + */ +#define GARROW_VERSION_3_0 G_ENCODE_VERSION(3, 0) + +/** + * GARROW_VERSION_2_0: + * + * You can use this macro value for compile time API version check. + * + * Since: 2.0.0 + */ +#define GARROW_VERSION_2_0 G_ENCODE_VERSION(2, 0) + +/** + * GARROW_VERSION_1_0: + * + * You can use this macro value for compile time API version check. + * + * Since: 1.0.0 + */ +#define GARROW_VERSION_1_0 G_ENCODE_VERSION(1, 0) + +/** + * GARROW_VERSION_0_17: + * + * You can use this macro value for compile time API version check. + * + * Since: 0.17.0 + */ +#define GARROW_VERSION_0_17 G_ENCODE_VERSION(0, 17) + +/** + * GARROW_VERSION_0_16: + * + * You can use this macro value for compile time API version check. + * + * Since: 0.16.0 + */ +#define GARROW_VERSION_0_16 G_ENCODE_VERSION(0, 16) + +/** + * GARROW_VERSION_0_15: + * + * You can use this macro value for compile time API version check. + * + * Since: 0.15.0 + */ +#define GARROW_VERSION_0_15 G_ENCODE_VERSION(0, 15) + +/** + * GARROW_VERSION_0_14: + * + * You can use this macro value for compile time API version check. + * + * Since: 0.14.0 + */ +#define GARROW_VERSION_0_14 G_ENCODE_VERSION(0, 14) + +/** + * GARROW_VERSION_0_13: + * + * You can use this macro value for compile time API version check. + * + * Since: 0.13.0 + */ +#define GARROW_VERSION_0_13 G_ENCODE_VERSION(0, 13) + +/** + * GARROW_VERSION_0_12: + * + * You can use this macro value for compile time API version check. + * + * Since: 0.12.0 + */ +#define GARROW_VERSION_0_12 G_ENCODE_VERSION(0, 12) + +/** + * GARROW_VERSION_0_10: + * + * You can use this macro value for compile time API version check. + * + * Since: 0.10.0 + */ +#define GARROW_VERSION_0_10 G_ENCODE_VERSION(0, 10) + +/** + * GARROW_VERSION_MIN_REQUIRED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GARROW_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GARROW_VERSION_MIN_REQUIRED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-glib/arrow-glib.h header. + * + * Since: 0.10.0 + */ +#ifndef GARROW_VERSION_MIN_REQUIRED +# define GARROW_VERSION_MIN_REQUIRED \ + G_ENCODE_VERSION(GARROW_VERSION_MAJOR, GARROW_VERSION_MINOR) +#endif + +/** + * GARROW_VERSION_MAX_ALLOWED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GARROW_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GARROW_VERSION_MAX_ALLOWED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-glib/arrow-glib.h header. + * + * Since: 0.10.0 + */ +#ifndef GARROW_VERSION_MAX_ALLOWED +# define GARROW_VERSION_MAX_ALLOWED \ + G_ENCODE_VERSION(GARROW_VERSION_MAJOR, GARROW_VERSION_MINOR) +#endif + + +#define GARROW_AVAILABLE_IN_ALL + +#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_6_0 +# define GARROW_DEPRECATED_IN_6_0 GARROW_DEPRECATED +# define GARROW_DEPRECATED_IN_6_0_FOR(function) GARROW_DEPRECATED_FOR(function) +#else +# define GARROW_DEPRECATED_IN_6_0 +# define GARROW_DEPRECATED_IN_6_0_FOR(function) +#endif + +#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_6_0 +# define GARROW_AVAILABLE_IN_6_0 GARROW_UNAVAILABLE(6, 0) +#else +# define GARROW_AVAILABLE_IN_6_0 +#endif + +#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_5_0 +# define GARROW_DEPRECATED_IN_5_0 GARROW_DEPRECATED +# define GARROW_DEPRECATED_IN_5_0_FOR(function) GARROW_DEPRECATED_FOR(function) +#else +# define GARROW_DEPRECATED_IN_5_0 +# define GARROW_DEPRECATED_IN_5_0_FOR(function) +#endif + +#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_5_0 +# define GARROW_AVAILABLE_IN_5_0 GARROW_UNAVAILABLE(5, 0) +#else +# define GARROW_AVAILABLE_IN_5_0 +#endif + +#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_4_0 +# define GARROW_DEPRECATED_IN_4_0 GARROW_DEPRECATED +# define GARROW_DEPRECATED_IN_4_0_FOR(function) GARROW_DEPRECATED_FOR(function) +#else +# define GARROW_DEPRECATED_IN_4_0 +# define GARROW_DEPRECATED_IN_4_0_FOR(function) +#endif + +#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_4_0 +# define GARROW_AVAILABLE_IN_4_0 GARROW_UNAVAILABLE(4, 0) +#else +# define GARROW_AVAILABLE_IN_4_0 +#endif + +#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_3_0 +# define GARROW_DEPRECATED_IN_3_0 GARROW_DEPRECATED +# define GARROW_DEPRECATED_IN_3_0_FOR(function) GARROW_DEPRECATED_FOR(function) +#else +# define GARROW_DEPRECATED_IN_3_0 +# define GARROW_DEPRECATED_IN_3_0_FOR(function) +#endif + +#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_3_0 +# define GARROW_AVAILABLE_IN_3_0 GARROW_UNAVAILABLE(3, 0) +#else +# define GARROW_AVAILABLE_IN_3_0 +#endif + +#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_2_0 +# define GARROW_DEPRECATED_IN_2_0 GARROW_DEPRECATED +# define GARROW_DEPRECATED_IN_2_0_FOR(function) GARROW_DEPRECATED_FOR(function) +#else +# define GARROW_DEPRECATED_IN_2_0 +# define GARROW_DEPRECATED_IN_2_0_FOR(function) +#endif + +#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_2_0 +# define GARROW_AVAILABLE_IN_2_0 GARROW_UNAVAILABLE(2, 0) +#else +# define GARROW_AVAILABLE_IN_2_0 +#endif + +#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_1_0 +# define GARROW_DEPRECATED_IN_1_0 GARROW_DEPRECATED +# define GARROW_DEPRECATED_IN_1_0_FOR(function) GARROW_DEPRECATED_FOR(function) +#else +# define GARROW_DEPRECATED_IN_1_0 +# define GARROW_DEPRECATED_IN_1_0_FOR(function) +#endif + +#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_1_0 +# define GARROW_AVAILABLE_IN_1_0 GARROW_UNAVAILABLE(1, 0) +#else +# define GARROW_AVAILABLE_IN_1_0 +#endif + +#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_17 +# define GARROW_DEPRECATED_IN_0_17 GARROW_DEPRECATED +# define GARROW_DEPRECATED_IN_0_17_FOR(function) GARROW_DEPRECATED_FOR(function) +#else +# define GARROW_DEPRECATED_IN_0_17 +# define GARROW_DEPRECATED_IN_0_17_FOR(function) +#endif + +#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_17 +# define GARROW_AVAILABLE_IN_0_17 GARROW_UNAVAILABLE(0, 17) +#else +# define GARROW_AVAILABLE_IN_0_17 +#endif + +#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_16 +# define GARROW_DEPRECATED_IN_0_16 GARROW_DEPRECATED +# define GARROW_DEPRECATED_IN_0_16_FOR(function) GARROW_DEPRECATED_FOR(function) +#else +# define GARROW_DEPRECATED_IN_0_16 +# define GARROW_DEPRECATED_IN_0_16_FOR(function) +#endif + +#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_16 +# define GARROW_AVAILABLE_IN_0_16 GARROW_UNAVAILABLE(0, 16) +#else +# define GARROW_AVAILABLE_IN_0_16 +#endif + +#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_15 +# define GARROW_DEPRECATED_IN_0_15 GARROW_DEPRECATED +# define GARROW_DEPRECATED_IN_0_15_FOR(function) GARROW_DEPRECATED_FOR(function) +#else +# define GARROW_DEPRECATED_IN_0_15 +# define GARROW_DEPRECATED_IN_0_15_FOR(function) +#endif + +#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_15 +# define GARROW_AVAILABLE_IN_0_15 GARROW_UNAVAILABLE(0, 15) +#else +# define GARROW_AVAILABLE_IN_0_15 +#endif + +#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_14 +# define GARROW_DEPRECATED_IN_0_14 GARROW_DEPRECATED +# define GARROW_DEPRECATED_IN_0_14_FOR(function) GARROW_DEPRECATED_FOR(function) +#else +# define GARROW_DEPRECATED_IN_0_14 +# define GARROW_DEPRECATED_IN_0_14_FOR(function) +#endif + +#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_14 +# define GARROW_AVAILABLE_IN_0_14 GARROW_UNAVAILABLE(0, 14) +#else +# define GARROW_AVAILABLE_IN_0_14 +#endif + +#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_13 +# define GARROW_DEPRECATED_IN_0_13 GARROW_DEPRECATED +# define GARROW_DEPRECATED_IN_0_13_FOR(function) GARROW_DEPRECATED_FOR(function) +#else +# define GARROW_DEPRECATED_IN_0_13 +# define GARROW_DEPRECATED_IN_0_13_FOR(function) +#endif + +#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_13 +# define GARROW_AVAILABLE_IN_0_13 GARROW_UNAVAILABLE(0, 13) +#else +# define GARROW_AVAILABLE_IN_0_13 +#endif + +#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_12 +# define GARROW_DEPRECATED_IN_0_12 GARROW_DEPRECATED +# define GARROW_DEPRECATED_IN_0_12_FOR(function) GARROW_DEPRECATED_FOR(function) +#else +# define GARROW_DEPRECATED_IN_0_12 +# define GARROW_DEPRECATED_IN_0_12_FOR(function) +#endif + +#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_12 +# define GARROW_AVAILABLE_IN_0_12 GARROW_UNAVAILABLE(0, 12) +#else +# define GARROW_AVAILABLE_IN_0_12 +#endif + +#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_10 +# define GARROW_DEPRECATED_IN_0_10 GARROW_DEPRECATED +# define GARROW_DEPRECATED_IN_0_10_FOR(function) GARROW_DEPRECATED_FOR(function) +#else +# define GARROW_DEPRECATED_IN_0_10 +# define GARROW_DEPRECATED_IN_0_10_FOR(function) +#endif + +#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_10 +# define GARROW_AVAILABLE_IN_0_10 GARROW_UNAVAILABLE(0, 10) +#else +# define GARROW_AVAILABLE_IN_0_10 +#endif diff --git a/src/arrow/c_glib/arrow-glib/writable-file.cpp b/src/arrow/c_glib/arrow-glib/writable-file.cpp new file mode 100644 index 000000000..74850ec20 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/writable-file.cpp @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow/api.h> + +#include <arrow-glib/error.hpp> +#include <arrow-glib/writable-file.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: writable-file + * @title: GArrowWritableFile + * @short_description: File output interface + * + * #GArrowWritableFile is an interface for file output. + */ + +G_DEFINE_INTERFACE(GArrowWritableFile, + garrow_writable_file, + G_TYPE_OBJECT) + +static void +garrow_writable_file_default_init(GArrowWritableFileInterface *iface) +{ +} + +/** + * garrow_writable_file_write_at: + * @writable_file: A #GArrowWritableFile. + * @position: The write start position. + * @data: (array length=n_bytes): The data to be written. + * @n_bytes: The number of bytes to be written. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + */ +gboolean +garrow_writable_file_write_at(GArrowWritableFile *writable_file, + gint64 position, + const guint8 *data, + gint64 n_bytes, + GError **error) +{ + const auto arrow_writable_file = + garrow_writable_file_get_raw(writable_file); + + auto status = arrow_writable_file->WriteAt(position, data, n_bytes); + return garrow_error_check(error, status, "[io][writable-file][write-at]"); +} + +G_END_DECLS + +std::shared_ptr<arrow::io::WritableFile> +garrow_writable_file_get_raw(GArrowWritableFile *writable_file) +{ + auto *iface = GARROW_WRITABLE_FILE_GET_IFACE(writable_file); + return iface->get_raw(writable_file); +} diff --git a/src/arrow/c_glib/arrow-glib/writable-file.h b/src/arrow/c_glib/arrow-glib/writable-file.h new file mode 100644 index 000000000..78359f6af --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/writable-file.h @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/gobject-type.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_WRITABLE_FILE (garrow_writable_file_get_type()) +G_DECLARE_INTERFACE(GArrowWritableFile, + garrow_writable_file, + GARROW, + WRITABLE_FILE, + GObject) + +gboolean garrow_writable_file_write_at(GArrowWritableFile *writable_file, + gint64 position, + const guint8 *data, + gint64 n_bytes, + GError **error); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/writable-file.hpp b/src/arrow/c_glib/arrow-glib/writable-file.hpp new file mode 100644 index 000000000..c36e24c92 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/writable-file.hpp @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/io/interfaces.h> + +#include <arrow-glib/writable-file.h> + +/** + * GArrowWritableFile: + * + * It wraps `arrow::io::WritableFile`. + */ +struct _GArrowWritableFileInterface +{ + GTypeInterface parent_iface; + + std::shared_ptr<arrow::io::WritableFile> (*get_raw)(GArrowWritableFile *file); +}; + +std::shared_ptr<arrow::io::WritableFile> +garrow_writable_file_get_raw(GArrowWritableFile *writable_file); diff --git a/src/arrow/c_glib/arrow-glib/writable.cpp b/src/arrow/c_glib/arrow-glib/writable.cpp new file mode 100644 index 000000000..47c1f3303 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/writable.cpp @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow/api.h> + +#include <arrow-glib/error.hpp> +#include <arrow-glib/writable.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: writable + * @title: GArrowWritable + * @short_description: Output interface + * + * #GArrowWritable is an interface for output. Output must be + * writable. + */ + +G_DEFINE_INTERFACE(GArrowWritable, + garrow_writable, + G_TYPE_OBJECT) + +static void +garrow_writable_default_init(GArrowWritableInterface *iface) +{ +} + +/** + * garrow_writable_write: + * @writable: A #GArrowWritable. + * @data: (array length=n_bytes): The data to be written. + * @n_bytes: The number of bytes to be written. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + */ +gboolean +garrow_writable_write(GArrowWritable *writable, + const guint8 *data, + gint64 n_bytes, + GError **error) +{ + const auto arrow_writable = garrow_writable_get_raw(writable); + + auto status = arrow_writable->Write(data, n_bytes); + return garrow_error_check(error, status, "[io][writable][write]"); +} + +/** + * garrow_writable_flush: + * @writable: A #GArrowWritable. + * @error: (nullable): Return location for a #GError or %NULL. + * + * It ensures writing all data on memory to storage. + * + * Returns: %TRUE on success, %FALSE if there was an error. + */ +gboolean +garrow_writable_flush(GArrowWritable *writable, + GError **error) +{ + const auto arrow_writable = garrow_writable_get_raw(writable); + + auto status = arrow_writable->Flush(); + return garrow_error_check(error, status, "[io][writable][flush]"); +} + +G_END_DECLS + +std::shared_ptr<arrow::io::Writable> +garrow_writable_get_raw(GArrowWritable *writable) +{ + auto *iface = GARROW_WRITABLE_GET_IFACE(writable); + return iface->get_raw(writable); +} diff --git a/src/arrow/c_glib/arrow-glib/writable.h b/src/arrow/c_glib/arrow-glib/writable.h new file mode 100644 index 000000000..151e85d98 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/writable.h @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/gobject-type.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_WRITABLE (garrow_writable_get_type()) +G_DECLARE_INTERFACE(GArrowWritable, + garrow_writable, + GARROW, + WRITABLE, + GObject) + +gboolean garrow_writable_write(GArrowWritable *writable, + const guint8 *data, + gint64 n_bytes, + GError **error); +gboolean garrow_writable_flush(GArrowWritable *writable, + GError **error); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/writable.hpp b/src/arrow/c_glib/arrow-glib/writable.hpp new file mode 100644 index 000000000..a80ef792b --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/writable.hpp @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/io/interfaces.h> + +#include <arrow-glib/writable.h> + +/** + * GArrowWritableInterface: + * + * It wraps `arrow::io::Writable`. + */ +struct _GArrowWritableInterface +{ + GTypeInterface parent_iface; + + std::shared_ptr<arrow::io::Writable> (*get_raw)(GArrowWritable *file); +}; + +std::shared_ptr<arrow::io::Writable> +garrow_writable_get_raw(GArrowWritable *writable); diff --git a/src/arrow/c_glib/arrow-glib/writer.cpp b/src/arrow/c_glib/arrow-glib/writer.cpp new file mode 100644 index 000000000..380815017 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/writer.cpp @@ -0,0 +1,334 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/array.hpp> +#include <arrow-glib/error.hpp> +#include <arrow-glib/record-batch.hpp> +#include <arrow-glib/schema.hpp> +#include <arrow-glib/table.hpp> + +#include <arrow-glib/output-stream.hpp> + +#include <arrow-glib/writer.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: writer + * @section_id: writer-classes + * @title: Writer classes + * @include: arrow-glib/arrow-glib.h + * + * #GArrowRecordBatchWriter is a base class for writing record batches + * in stream format into output. + * + * #GArrowRecordBatchStreamWriter is a base class for writing record + * batches in stream format into output synchronously. + * + * #GArrowRecordBatchFileWriter is a class for writing record + * batches in file format into output. + */ + +typedef struct GArrowRecordBatchWriterPrivate_ { + std::shared_ptr<arrow::ipc::RecordBatchWriter> record_batch_writer; +} GArrowRecordBatchWriterPrivate; + +enum { + PROP_0, + PROP_RECORD_BATCH_WRITER +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowRecordBatchWriter, + garrow_record_batch_writer, + G_TYPE_OBJECT); + +#define GARROW_RECORD_BATCH_WRITER_GET_PRIVATE(obj) \ + static_cast<GArrowRecordBatchWriterPrivate *>( \ + garrow_record_batch_writer_get_instance_private( \ + GARROW_RECORD_BATCH_WRITER(obj))) + +static void +garrow_record_batch_writer_finalize(GObject *object) +{ + auto priv = GARROW_RECORD_BATCH_WRITER_GET_PRIVATE(object); + + priv->record_batch_writer.~shared_ptr(); + + G_OBJECT_CLASS(garrow_record_batch_writer_parent_class)->finalize(object); +} + +static void +garrow_record_batch_writer_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_RECORD_BATCH_WRITER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_RECORD_BATCH_WRITER: + priv->record_batch_writer = + *static_cast<std::shared_ptr<arrow::ipc::RecordBatchWriter> *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_record_batch_writer_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + switch (prop_id) { + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_record_batch_writer_init(GArrowRecordBatchWriter *object) +{ + auto priv = GARROW_RECORD_BATCH_WRITER_GET_PRIVATE(object); + new(&priv->record_batch_writer) std::shared_ptr<arrow::ipc::RecordBatchWriter>; +} + +static void +garrow_record_batch_writer_class_init(GArrowRecordBatchWriterClass *klass) +{ + GObjectClass *gobject_class; + GParamSpec *spec; + + gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_record_batch_writer_finalize; + gobject_class->set_property = garrow_record_batch_writer_set_property; + gobject_class->get_property = garrow_record_batch_writer_get_property; + + spec = g_param_spec_pointer("record-batch-writer", + "arrow::ipc::RecordBatchWriter", + "The raw std::shared<arrow::ipc::RecordBatchWriter> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_RECORD_BATCH_WRITER, spec); +} + +/** + * garrow_record_batch_writer_write_record_batch: + * @writer: A #GArrowRecordBatchWriter. + * @record_batch: The record batch to be written. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.4.0 + */ +gboolean +garrow_record_batch_writer_write_record_batch(GArrowRecordBatchWriter *writer, + GArrowRecordBatch *record_batch, + GError **error) +{ + auto arrow_writer = garrow_record_batch_writer_get_raw(writer); + auto arrow_record_batch = garrow_record_batch_get_raw(record_batch); + auto arrow_record_batch_raw = arrow_record_batch.get(); + + auto status = arrow_writer->WriteRecordBatch(*arrow_record_batch_raw); + return garrow_error_check(error, + status, + "[record-batch-writer][write-record-batch]"); +} + +/** + * garrow_record_batch_writer_write_table: + * @writer: A #GArrowRecordBatchWriter. + * @table: The table to be written. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.8.0 + */ +gboolean +garrow_record_batch_writer_write_table(GArrowRecordBatchWriter *writer, + GArrowTable *table, + GError **error) +{ + auto arrow_writer = garrow_record_batch_writer_get_raw(writer); + auto arrow_table = garrow_table_get_raw(table); + + auto status = arrow_writer->WriteTable(*arrow_table); + return garrow_error_check(error, + status, + "[record-batch-writer][write-table]"); +} + +/** + * garrow_record_batch_writer_close: + * @writer: A #GArrowRecordBatchWriter. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.4.0 + */ +gboolean +garrow_record_batch_writer_close(GArrowRecordBatchWriter *writer, + GError **error) +{ + auto arrow_writer = garrow_record_batch_writer_get_raw(writer); + + auto status = arrow_writer->Close(); + return garrow_error_check(error, status, "[record-batch-writer][close]"); +} + + +G_DEFINE_TYPE(GArrowRecordBatchStreamWriter, + garrow_record_batch_stream_writer, + GARROW_TYPE_RECORD_BATCH_WRITER); + +static void +garrow_record_batch_stream_writer_init(GArrowRecordBatchStreamWriter *object) +{ +} + +static void +garrow_record_batch_stream_writer_class_init(GArrowRecordBatchStreamWriterClass *klass) +{ +} + +/** + * garrow_record_batch_stream_writer_new: + * @sink: The output of the writer. + * @schema: The schema of the writer. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowRecordBatchStreamWriter + * or %NULL on error. + * + * Since: 0.4.0 + */ +GArrowRecordBatchStreamWriter * +garrow_record_batch_stream_writer_new(GArrowOutputStream *sink, + GArrowSchema *schema, + GError **error) +{ + auto arrow_sink = garrow_output_stream_get_raw(sink); + auto arrow_schema = garrow_schema_get_raw(schema); + auto arrow_writer_result = + arrow::ipc::MakeStreamWriter(arrow_sink, arrow_schema); + if (garrow::check(error, + arrow_writer_result, + "[record-batch-stream-writer][open]")) { + auto arrow_writer = *arrow_writer_result; + return garrow_record_batch_stream_writer_new_raw(&arrow_writer); + } else { + return NULL; + } +} + + +G_DEFINE_TYPE(GArrowRecordBatchFileWriter, + garrow_record_batch_file_writer, + GARROW_TYPE_RECORD_BATCH_STREAM_WRITER); + +static void +garrow_record_batch_file_writer_init(GArrowRecordBatchFileWriter *object) +{ +} + +static void +garrow_record_batch_file_writer_class_init(GArrowRecordBatchFileWriterClass *klass) +{ +} + +/** + * garrow_record_batch_file_writer_new: + * @sink: The output of the writer. + * @schema: The schema of the writer. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowRecordBatchFileWriter + * or %NULL on error. + * + * Since: 0.4.0 + */ +GArrowRecordBatchFileWriter * +garrow_record_batch_file_writer_new(GArrowOutputStream *sink, + GArrowSchema *schema, + GError **error) +{ + auto arrow_sink = garrow_output_stream_get_raw(sink); + auto arrow_schema = garrow_schema_get_raw(schema); + std::shared_ptr<arrow::ipc::RecordBatchWriter> arrow_writer; + auto arrow_writer_result = + arrow::ipc::MakeFileWriter(arrow_sink, arrow_schema); + if (garrow::check(error, + arrow_writer_result, + "[record-batch-file-writer][open]")) { + auto arrow_writer = *arrow_writer_result; + return garrow_record_batch_file_writer_new_raw(&arrow_writer); + } else { + return NULL; + } +} + +G_END_DECLS + +GArrowRecordBatchWriter * +garrow_record_batch_writer_new_raw(std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer) +{ + auto writer = + GARROW_RECORD_BATCH_WRITER( + g_object_new(GARROW_TYPE_RECORD_BATCH_WRITER, + "record-batch-writer", arrow_writer, + NULL)); + return writer; +} + +std::shared_ptr<arrow::ipc::RecordBatchWriter> +garrow_record_batch_writer_get_raw(GArrowRecordBatchWriter *writer) +{ + auto priv = GARROW_RECORD_BATCH_WRITER_GET_PRIVATE(writer); + return priv->record_batch_writer; +} + +GArrowRecordBatchStreamWriter * +garrow_record_batch_stream_writer_new_raw(std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer) +{ + auto writer = + GARROW_RECORD_BATCH_STREAM_WRITER( + g_object_new(GARROW_TYPE_RECORD_BATCH_STREAM_WRITER, + "record-batch-writer", arrow_writer, + NULL)); + return writer; +} + +GArrowRecordBatchFileWriter * +garrow_record_batch_file_writer_new_raw(std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer) +{ + auto writer = + GARROW_RECORD_BATCH_FILE_WRITER( + g_object_new(GARROW_TYPE_RECORD_BATCH_FILE_WRITER, + "record-batch-writer", arrow_writer, + NULL)); + return writer; +} diff --git a/src/arrow/c_glib/arrow-glib/writer.h b/src/arrow/c_glib/arrow-glib/writer.h new file mode 100644 index 000000000..a0d22fe62 --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/writer.h @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/array.h> +#include <arrow-glib/record-batch.h> +#include <arrow-glib/schema.h> + +#include <arrow-glib/output-stream.h> + +G_BEGIN_DECLS + +#define GARROW_TYPE_RECORD_BATCH_WRITER \ + (garrow_record_batch_writer_get_type()) +#define GARROW_RECORD_BATCH_WRITER(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST((obj), \ + GARROW_TYPE_RECORD_BATCH_WRITER, \ + GArrowRecordBatchWriter)) +#define GARROW_RECORD_BATCH_WRITER_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST((klass), \ + GARROW_TYPE_RECORD_BATCH_WRITER, \ + GArrowRecordBatchWriterClass)) +#define GARROW_IS_RECORD_BATCH_WRITER(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE((obj), \ + GARROW_TYPE_RECORD_BATCH_WRITER)) +#define GARROW_IS_RECORD_BATCH_WRITER_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE((klass), \ + GARROW_TYPE_RECORD_BATCH_WRITER)) +#define GARROW_RECORD_BATCH_WRITER_GET_CLASS(obj) \ + (G_TYPE_INSTANCE_GET_CLASS((obj), \ + GARROW_TYPE_RECORD_BATCH_WRITER, \ + GArrowRecordBatchWriterClass)) + +typedef struct _GArrowRecordBatchWriter GArrowRecordBatchWriter; +#ifndef __GTK_DOC_IGNORE__ +typedef struct _GArrowRecordBatchWriterClass GArrowRecordBatchWriterClass; +#endif + +/** + * GArrowRecordBatchWriter: + * + * It wraps `arrow::ipc::RecordBatchWriter`. + */ +struct _GArrowRecordBatchWriter +{ + /*< private >*/ + GObject parent_instance; +}; + +#ifndef __GTK_DOC_IGNORE__ +struct _GArrowRecordBatchWriterClass +{ + GObjectClass parent_class; +}; +#endif + +GType garrow_record_batch_writer_get_type(void) G_GNUC_CONST; + +gboolean garrow_record_batch_writer_write_record_batch( + GArrowRecordBatchWriter *writer, + GArrowRecordBatch *record_batch, + GError **error); +gboolean garrow_record_batch_writer_write_table( + GArrowRecordBatchWriter *writer, + GArrowTable *table, + GError **error); +gboolean garrow_record_batch_writer_close( + GArrowRecordBatchWriter *writer, + GError **error); + + +#define GARROW_TYPE_RECORD_BATCH_STREAM_WRITER \ + (garrow_record_batch_stream_writer_get_type()) +#define GARROW_RECORD_BATCH_STREAM_WRITER(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST((obj), \ + GARROW_TYPE_RECORD_BATCH_STREAM_WRITER, \ + GArrowRecordBatchStreamWriter)) +#define GARROW_RECORD_BATCH_STREAM_WRITER_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST((klass), \ + GARROW_TYPE_RECORD_BATCH_STREAM_WRITER, \ + GArrowRecordBatchStreamWriterClass)) +#define GARROW_IS_RECORD_BATCH_STREAM_WRITER(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE((obj), \ + GARROW_TYPE_RECORD_BATCH_STREAM_WRITER)) +#define GARROW_IS_RECORD_BATCH_STREAM_WRITER_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE((klass), \ + GARROW_TYPE_RECORD_BATCH_STREAM_WRITER)) +#define GARROW_RECORD_BATCH_STREAM_WRITER_GET_CLASS(obj) \ + (G_TYPE_INSTANCE_GET_CLASS((obj), \ + GARROW_TYPE_RECORD_BATCH_STREAM_WRITER, \ + GArrowRecordBatchStreamWriterClass)) + +typedef struct _GArrowRecordBatchStreamWriter GArrowRecordBatchStreamWriter; +#ifndef __GTK_DOC_IGNORE__ +typedef struct _GArrowRecordBatchStreamWriterClass GArrowRecordBatchStreamWriterClass; +#endif + +/** + * GArrowRecordBatchStreamWriter: + * + * It wraps `arrow::ipc::RecordBatchStreamWriter`. + */ +struct _GArrowRecordBatchStreamWriter +{ + /*< private >*/ + GArrowRecordBatchWriter parent_instance; +}; + +#ifndef __GTK_DOC_IGNORE__ +struct _GArrowRecordBatchStreamWriterClass +{ + GArrowRecordBatchWriterClass parent_class; +}; +#endif + +GType garrow_record_batch_stream_writer_get_type(void) G_GNUC_CONST; + +GArrowRecordBatchStreamWriter *garrow_record_batch_stream_writer_new( + GArrowOutputStream *sink, + GArrowSchema *schema, + GError **error); + + +#define GARROW_TYPE_RECORD_BATCH_FILE_WRITER \ + (garrow_record_batch_file_writer_get_type()) +#define GARROW_RECORD_BATCH_FILE_WRITER(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST((obj), \ + GARROW_TYPE_RECORD_BATCH_FILE_WRITER, \ + GArrowRecordBatchFileWriter)) +#define GARROW_RECORD_BATCH_FILE_WRITER_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST((klass), \ + GARROW_TYPE_RECORD_BATCH_FILE_WRITER, \ + GArrowRecordBatchFileWriterClass)) +#define GARROW_IS_RECORD_BATCH_FILE_WRITER(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE((obj), \ + GARROW_TYPE_RECORD_BATCH_FILE_WRITER)) +#define GARROW_IS_RECORD_BATCH_FILE_WRITER_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE((klass), \ + GARROW_TYPE_RECORD_BATCH_FILE_WRITER)) +#define GARROW_RECORD_BATCH_FILE_WRITER_GET_CLASS(obj) \ + (G_TYPE_INSTANCE_GET_CLASS((obj), \ + GARROW_TYPE_RECORD_BATCH_FILE_WRITER, \ + GArrowRecordBatchFileWriterClass)) + +typedef struct _GArrowRecordBatchFileWriter GArrowRecordBatchFileWriter; +#ifndef __GTK_DOC_IGNORE__ +typedef struct _GArrowRecordBatchFileWriterClass GArrowRecordBatchFileWriterClass; +#endif + +/** + * GArrowRecordBatchFileWriter: + * + * It wraps `arrow::ipc::RecordBatchFileWriter`. + */ +struct _GArrowRecordBatchFileWriter +{ + /*< private >*/ + GArrowRecordBatchStreamWriter parent_instance; +}; + +#ifndef __GTK_DOC_IGNORE__ +struct _GArrowRecordBatchFileWriterClass +{ + GArrowRecordBatchStreamWriterClass parent_class; +}; +#endif + +GType garrow_record_batch_file_writer_get_type(void) G_GNUC_CONST; + +GArrowRecordBatchFileWriter *garrow_record_batch_file_writer_new( + GArrowOutputStream *sink, + GArrowSchema *schema, + GError **error); + +G_END_DECLS diff --git a/src/arrow/c_glib/arrow-glib/writer.hpp b/src/arrow/c_glib/arrow-glib/writer.hpp new file mode 100644 index 000000000..1e188bd3c --- /dev/null +++ b/src/arrow/c_glib/arrow-glib/writer.hpp @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/api.h> +#include <arrow/ipc/api.h> +#include <arrow/ipc/feather.h> + +#include <arrow-glib/writer.h> + +GArrowRecordBatchWriter *garrow_record_batch_writer_new_raw(std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer); +std::shared_ptr<arrow::ipc::RecordBatchWriter> garrow_record_batch_writer_get_raw(GArrowRecordBatchWriter *writer); + +GArrowRecordBatchStreamWriter *garrow_record_batch_stream_writer_new_raw(std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer); + +GArrowRecordBatchFileWriter *garrow_record_batch_file_writer_new_raw(std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer); diff --git a/src/arrow/c_glib/doc/arrow-dataset-glib/arrow-dataset-glib-docs.xml b/src/arrow/c_glib/doc/arrow-dataset-glib/arrow-dataset-glib-docs.xml new file mode 100644 index 000000000..b13195b07 --- /dev/null +++ b/src/arrow/c_glib/doc/arrow-dataset-glib/arrow-dataset-glib-docs.xml @@ -0,0 +1,88 @@ +<?xml version="1.0"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN" + "http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd" +[ + <!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'"> + <!ENTITY % gtkdocentities SYSTEM "entities.xml"> + %gtkdocentities; +]> +<book id="index" xmlns:xi="http://www.w3.org/2003/XInclude"> + <bookinfo> + <title>&package_name; Reference Manual</title> + <releaseinfo> + for &package_string;. + <!-- + The latest version of this documentation can be found on-line at + <ulink role="online-location" url="http://[SERVER]/&package_name;/">http://[SERVER]/&package_name;/</ulink>. + --> + </releaseinfo> + </bookinfo> + + <part id="data"> + <title>Data</title> + <chapter id="source"> + <title>Partitioning</title> + <xi:include href="xml/partitioning.xml"/> + <title>Dataset</title> + <xi:include href="xml/dataset.xml"/> + <title>Dataset factory</title> + <xi:include href="xml/dataset-factory.xml"/> + </chapter> + <chapter id="read"> + <title>Scan</title> + <xi:include href="xml/scanner.xml"/> + <title>Fragment</title> + <xi:include href="xml/fragment.xml"/> + <title>File format</title> + <xi:include href="xml/file-format.xml"/> + </chapter> + </part> + + <chapter id="object-tree"> + <title>Object Hierarchy</title> + <xi:include href="xml/tree_index.sgml"/> + </chapter> + <index id="api-index-full"> + <title>API Index</title> + <xi:include href="xml/api-index-full.xml"><xi:fallback /></xi:include> + </index> + <index id="deprecated-api-index" role="deprecated"> + <title>Index of deprecated API</title> + <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-6-0-0" role="6.0.0"> + <title>Index of new symbols in 6.0.0</title> + <xi:include href="xml/api-index-6.0.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-5-0-0" role="5.0.0"> + <title>Index of new symbols in 5.0.0</title> + <xi:include href="xml/api-index-5.0.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-4-0-0" role="4.0.0"> + <title>Index of new symbols in 4.0.0</title> + <xi:include href="xml/api-index-4.0.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-3-0-0" role="3.0.0"> + <title>Index of new symbols in 3.0.0</title> + <xi:include href="xml/api-index-3.0.0.xml"><xi:fallback /></xi:include> + </index> + <xi:include href="xml/annotation-glossary.xml"><xi:fallback /></xi:include> +</book> diff --git a/src/arrow/c_glib/doc/arrow-dataset-glib/entities.xml.in b/src/arrow/c_glib/doc/arrow-dataset-glib/entities.xml.in new file mode 100644 index 000000000..aa5addb4e --- /dev/null +++ b/src/arrow/c_glib/doc/arrow-dataset-glib/entities.xml.in @@ -0,0 +1,24 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<!ENTITY package "@PACKAGE@"> +<!ENTITY package_bugreport "@PACKAGE_BUGREPORT@"> +<!ENTITY package_name "@PACKAGE_NAME@"> +<!ENTITY package_string "@PACKAGE_STRING@"> +<!ENTITY package_url "@PACKAGE_URL@"> +<!ENTITY package_version "@PACKAGE_VERSION@"> diff --git a/src/arrow/c_glib/doc/arrow-dataset-glib/meson.build b/src/arrow/c_glib/doc/arrow-dataset-glib/meson.build new file mode 100644 index 000000000..ca037b7e3 --- /dev/null +++ b/src/arrow/c_glib/doc/arrow-dataset-glib/meson.build @@ -0,0 +1,83 @@ +# -*- indent-tabs-mode: nil -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +package_id = 'arrow-dataset-glib' +package_name = 'Apache Arrow Dataset GLib' +entities_conf = configuration_data() +entities_conf.set('PACKAGE', package_id) +entities_conf.set('PACKAGE_BUGREPORT', + 'https://issues.apache.org/jira/browse/ARROW') +entities_conf.set('PACKAGE_NAME', package_name) +entities_conf.set('PACKAGE_STRING', + ' '.join([package_id, version])) +entities_conf.set('PACKAGE_URL', 'https://arrow.apache.org/') +entities_conf.set('PACKAGE_VERSION', version) +configure_file(input: 'entities.xml.in', + output: 'entities.xml', + configuration: entities_conf) + +private_headers = [ +] + +content_files = [ +] + +html_images = [ +] + +glib_prefix = dependency('glib-2.0').get_pkgconfig_variable('prefix') +glib_doc_path = join_paths(glib_prefix, 'share', 'gtk-doc', 'html') +arrow_glib_doc_path = join_paths(data_dir, + 'gtk-doc', + 'html', + 'arrow-glib') +doc_path = join_paths(data_dir, 'gtk-doc', 'html', package_id) + +source_directories = [ + join_paths(meson.source_root(), package_id), + join_paths(meson.build_root(), package_id), +] +dependencies = [ + arrow_glib, + arrow_dataset_glib, +] +ignore_headers = [] +gnome.gtkdoc(package_id, + main_xml: package_id + '-docs.xml', + src_dir: source_directories, + dependencies: dependencies, + ignore_headers: ignore_headers, + gobject_typesfile: package_id + '.types', + scan_args: [ + '--rebuild-types', + '--deprecated-guards=GARROW_DISABLE_DEPRECATED', + ], + mkdb_args: [ + '--output-format=xml', + '--name-space=gadataset', + '--source-suffixes=c,cpp,h', + ], + fixxref_args: [ + '--html-dir=' + doc_path, + '--extra-dir=' + join_paths(glib_doc_path, 'glib'), + '--extra-dir=' + join_paths(glib_doc_path, 'gobject'), + '--extra-dir=' + arrow_glib_doc_path, + ], + html_assets: html_images, + install: true) diff --git a/src/arrow/c_glib/doc/arrow-flight-glib/arrow-flight-glib-docs.xml b/src/arrow/c_glib/doc/arrow-flight-glib/arrow-flight-glib-docs.xml new file mode 100644 index 000000000..397a8bec0 --- /dev/null +++ b/src/arrow/c_glib/doc/arrow-flight-glib/arrow-flight-glib-docs.xml @@ -0,0 +1,67 @@ +<?xml version="1.0"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN" + "http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd" +[ + <!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'"> + <!ENTITY % gtkdocentities SYSTEM "entities.xml"> + %gtkdocentities; +]> +<book id="index" xmlns:xi="http://www.w3.org/2003/XInclude"> + <bookinfo> + <title>&package_name; Reference Manual</title> + <releaseinfo> + for &package_string;. + <!-- + The latest version of this documentation can be found on-line at + <ulink role="online-location" url="http://[SERVER]/&package_name;/">http://[SERVER]/&package_name;/</ulink>. + --> + </releaseinfo> + </bookinfo> + + <part id="rpc"> + <title>RPC</title> + <xi:include href="xml/common.xml"/> + <xi:include href="xml/client.xml"/> + <xi:include href="xml/server.xml"/> + </part> + + <chapter id="object-tree"> + <title>Object Hierarchy</title> + <xi:include href="xml/tree_index.sgml"/> + </chapter> + <index id="api-index-full"> + <title>API Index</title> + <xi:include href="xml/api-index-full.xml"><xi:fallback /></xi:include> + </index> + <index id="deprecated-api-index" role="deprecated"> + <title>Index of deprecated API</title> + <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-6-0-0" role="6.0.0"> + <title>Index of new symbols in 6.0.0</title> + <xi:include href="xml/api-index-6.0.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-5-0-0" role="5.0.0"> + <title>Index of new symbols in 5.0.0</title> + <xi:include href="xml/api-index-5.0.0.xml"><xi:fallback /></xi:include> + </index> + <xi:include href="xml/annotation-glossary.xml"><xi:fallback /></xi:include> +</book> diff --git a/src/arrow/c_glib/doc/arrow-flight-glib/entities.xml.in b/src/arrow/c_glib/doc/arrow-flight-glib/entities.xml.in new file mode 100644 index 000000000..aa5addb4e --- /dev/null +++ b/src/arrow/c_glib/doc/arrow-flight-glib/entities.xml.in @@ -0,0 +1,24 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<!ENTITY package "@PACKAGE@"> +<!ENTITY package_bugreport "@PACKAGE_BUGREPORT@"> +<!ENTITY package_name "@PACKAGE_NAME@"> +<!ENTITY package_string "@PACKAGE_STRING@"> +<!ENTITY package_url "@PACKAGE_URL@"> +<!ENTITY package_version "@PACKAGE_VERSION@"> diff --git a/src/arrow/c_glib/doc/arrow-flight-glib/meson.build b/src/arrow/c_glib/doc/arrow-flight-glib/meson.build new file mode 100644 index 000000000..7ae38e4f5 --- /dev/null +++ b/src/arrow/c_glib/doc/arrow-flight-glib/meson.build @@ -0,0 +1,83 @@ +# -*- indent-tabs-mode: nil -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +package_id = 'arrow-flight-glib' +package_name = 'Apache Arrow Flight GLib' +entities_conf = configuration_data() +entities_conf.set('PACKAGE', package_id) +entities_conf.set('PACKAGE_BUGREPORT', + 'https://issues.apache.org/jira/browse/ARROW') +entities_conf.set('PACKAGE_NAME', package_name) +entities_conf.set('PACKAGE_STRING', + ' '.join([package_id, version])) +entities_conf.set('PACKAGE_URL', 'https://arrow.apache.org/') +entities_conf.set('PACKAGE_VERSION', version) +configure_file(input: 'entities.xml.in', + output: 'entities.xml', + configuration: entities_conf) + +private_headers = [ +] + +content_files = [ +] + +html_images = [ +] + +glib_prefix = dependency('glib-2.0').get_pkgconfig_variable('prefix') +glib_doc_path = join_paths(glib_prefix, 'share', 'gtk-doc', 'html') +arrow_glib_doc_path = join_paths(data_dir, + 'gtk-doc', + 'html', + 'arrow-glib') +doc_path = join_paths(data_dir, 'gtk-doc', 'html', package_id) + +source_directories = [ + join_paths(meson.source_root(), package_id), + join_paths(meson.build_root(), package_id), +] +dependencies = [ + arrow_glib, + arrow_flight_glib, +] +ignore_headers = [] +gnome.gtkdoc(package_id, + main_xml: package_id + '-docs.xml', + src_dir: source_directories, + dependencies: dependencies, + ignore_headers: ignore_headers, + gobject_typesfile: package_id + '.types', + scan_args: [ + '--rebuild-types', + '--deprecated-guards=GARROW_DISABLE_DEPRECATED', + ], + mkdb_args: [ + '--output-format=xml', + '--name-space=gad', + '--source-suffixes=c,cpp,h', + ], + fixxref_args: [ + '--html-dir=' + doc_path, + '--extra-dir=' + join_paths(glib_doc_path, 'glib'), + '--extra-dir=' + join_paths(glib_doc_path, 'gobject'), + '--extra-dir=' + arrow_glib_doc_path, + ], + html_assets: html_images, + install: true) diff --git a/src/arrow/c_glib/doc/arrow-glib/arrow-glib-docs.xml b/src/arrow/c_glib/doc/arrow-glib/arrow-glib-docs.xml new file mode 100644 index 000000000..43f6a7edc --- /dev/null +++ b/src/arrow/c_glib/doc/arrow-glib/arrow-glib-docs.xml @@ -0,0 +1,272 @@ +<?xml version="1.0"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN" + "http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd" +[ + <!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'"> + <!ENTITY % gtkdocentities SYSTEM "entities.xml"> + %gtkdocentities; +]> +<book id="index" xmlns:xi="http://www.w3.org/2003/XInclude"> + <bookinfo> + <title>&package_name; Reference Manual</title> + <releaseinfo> + for &package_string;. + <!-- + The latest version of this documentation can be found on-line at + <ulink role="online-location" url="http://[SERVER]/&package_name;/">http://[SERVER]/&package_name;/</ulink>. + --> + </releaseinfo> + </bookinfo> + + <part id="data"> + <title>Data</title> + <chapter id="array"> + <title>Array</title> + <xi:include href="xml/basic-array.xml"/> + <xi:include href="xml/composite-array.xml"/> + </chapter> + <chapter id="array-builder"> + <title>Array builder</title> + <xi:include href="xml/array-builder.xml"/> + </chapter> + <chapter id="tensor"> + <title>Tensor</title> + <xi:include href="xml/tensor.xml"/> + </chapter> + <chapter id="value"> + <title>Value</title> + <xi:include href="xml/decimal.xml"/> + </chapter> + <chapter id="scalar"> + <title>Scalar</title> + <xi:include href="xml/scalar.xml"/> + </chapter> + <chapter id="type"> + <title>Type</title> + <xi:include href="xml/type.xml"/> + <xi:include href="xml/basic-data-type.xml"/> + <xi:include href="xml/composite-data-type.xml"/> + </chapter> + <chapter id="schema"> + <title>Schema</title> + <xi:include href="xml/field.xml"/> + <xi:include href="xml/schema.xml"/> + </chapter> + <chapter id="table"> + <title>Table</title> + <xi:include href="xml/table.xml"/> + <xi:include href="xml/record-batch.xml"/> + <xi:include href="xml/chunked-array.xml"/> + </chapter> + <chapter id="table-builder"> + <title>Table builder</title> + <xi:include href="xml/table-builder.xml"/> + </chapter> + <chapter id="computation"> + <title>Computation</title> + <xi:include href="xml/compute.xml"/> + <xi:include href="xml/datum.xml"/> + <xi:include href="xml/expression.xml"/> + </chapter> + <chapter id="buffer"> + <title>Buffer</title> + <xi:include href="xml/buffer.xml"/> + </chapter> + <chapter id="codec"> + <title>Codec</title> + <xi:include href="xml/codec.xml"/> + </chapter> + <chapter id="error"> + <title>Error</title> + <xi:include href="xml/error.xml"/> + </chapter> + </part> + + <part id="file-system-api"> + <title>File system API</title> + <chapter id="file-system"> + <title>File system</title> + <xi:include href="xml/file-system.xml"/> + </chapter> + <chapter id="local-file-system"> + <title>Local file system</title> + <xi:include href="xml/local-file-system.xml"/> + </chapter> + </part> + + <part id="io"> + <title>IO</title> + <chapter id="mode"> + <title>Mode</title> + <xi:include href="xml/file-mode.xml"/> + </chapter> + <chapter id="input"> + <title>Input</title> + <xi:include href="xml/readable.xml"/> + <xi:include href="xml/input-stream.xml"/> + </chapter> + <chapter id="output"> + <title>Output</title> + <xi:include href="xml/writable.xml"/> + <xi:include href="xml/writable-file.xml"/> + <xi:include href="xml/output-stream.xml"/> + </chapter> + <chapter id="input-output"> + <title>Input and output</title> + <xi:include href="xml/file.xml"/> + </chapter> + </part> + + <part id="ipc"> + <title>IPC</title> + <chapter id="metadata"> + <title>Metadata</title> + <xi:include href="xml/metadata-version.xml"/> + </chapter> + <chapter id="ipc-options"> + <title>Options</title> + <xi:include href="xml/ipc-options.xml"/> + </chapter> + <chapter id="reader"> + <title>Reader</title> + <xi:include href="xml/reader.xml"/> + <xi:include href="xml/orc-file-reader.xml"><xi:fallback /></xi:include> + </chapter> + <chapter id="writer"> + <title>Writer</title> + <xi:include href="xml/writer.xml"/> + </chapter> + </part> + + <part id="gpu"> + <title>GPU</title> + <chapter id="cuda"> + <title>CUDA</title> + <xi:include href="xml/cuda.xml"><xi:fallback /></xi:include> + </chapter> + </part> + + <part id="misc"> + <title>Misc</title> + <chapter id="version"> + <title>Version</title> + <xi:include href="xml/version.xml"></xi:include> + </chapter> + </part> + + <chapter id="object-tree"> + <title>Object Hierarchy</title> + <xi:include href="xml/tree_index.sgml"/> + </chapter> + <index id="api-index-full"> + <title>API Index</title> + <xi:include href="xml/api-index-full.xml"><xi:fallback /></xi:include> + </index> + <index id="deprecated-api-index" role="deprecated"> + <title>Index of deprecated API</title> + <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-6-0-0" role="6.0.0"> + <title>Index of new symbols in 6.0.0</title> + <xi:include href="xml/api-index-6.0.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-5-0-0" role="5.0.0"> + <title>Index of new symbols in 5.0.0</title> + <xi:include href="xml/api-index-5.0.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-4-0-0" role="4.0.0"> + <title>Index of new symbols in 4.0.0</title> + <xi:include href="xml/api-index-4.0.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-3-0-0" role="3.0.0"> + <title>Index of new symbols in 3.0.0</title> + <xi:include href="xml/api-index-3.0.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-2-0-0" role="2.0.0"> + <title>Index of new symbols in 2.0.0</title> + <xi:include href="xml/api-index-2.0.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-1-0-0" role="1.0.0"> + <title>Index of new symbols in 1.0.0</title> + <xi:include href="xml/api-index-1.0.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-0-17-0" role="0.17.0"> + <title>Index of new symbols in 0.17.0</title> + <xi:include href="xml/api-index-0.17.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-0-16-0" role="0.16.0"> + <title>Index of new symbols in 0.16.0</title> + <xi:include href="xml/api-index-0.16.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-0-15-0" role="0.15.0"> + <title>Index of new symbols in 0.15.0</title> + <xi:include href="xml/api-index-0.15.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-0-14-0" role="0.14.0"> + <title>Index of new symbols in 0.14.0</title> + <xi:include href="xml/api-index-0.14.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-0-13-0" role="0.13.0"> + <title>Index of new symbols in 0.13.0</title> + <xi:include href="xml/api-index-0.13.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-0-12-0" role="0.12.0"> + <title>Index of new symbols in 0.12.0</title> + <xi:include href="xml/api-index-0.12.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-0-11-0" role="0.11.0"> + <title>Index of new symbols in 0.11.0</title> + <xi:include href="xml/api-index-0.11.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-0-10-0" role="0.10.0"> + <title>Index of new symbols in 0.10.0</title> + <xi:include href="xml/api-index-0.10.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-0-9-0" role="0.9.0"> + <title>Index of new symbols in 0.9.0</title> + <xi:include href="xml/api-index-0.9.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-0-8-0" role="0.8.0"> + <title>Index of new symbols in 0.8.0</title> + <xi:include href="xml/api-index-0.8.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-0-7-0" role="0.7.0"> + <title>Index of new symbols in 0.7.0</title> + <xi:include href="xml/api-index-0.7.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-0-6-0" role="0.6.0"> + <title>Index of new symbols in 0.6.0</title> + <xi:include href="xml/api-index-0.6.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-0-5-0" role="0.5.0"> + <title>Index of new symbols in 0.5.0</title> + <xi:include href="xml/api-index-0.5.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-0-4-0" role="0.4.0"> + <title>Index of new symbols in 0.4.0</title> + <xi:include href="xml/api-index-0.4.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-0-3-0" role="0.3.0"> + <title>Index of new symbols in 0.3.0</title> + <xi:include href="xml/api-index-0.3.0.xml"><xi:fallback /></xi:include> + </index> + <xi:include href="xml/annotation-glossary.xml"><xi:fallback /></xi:include> +</book> diff --git a/src/arrow/c_glib/doc/arrow-glib/entities.xml.in b/src/arrow/c_glib/doc/arrow-glib/entities.xml.in new file mode 100644 index 000000000..aa5addb4e --- /dev/null +++ b/src/arrow/c_glib/doc/arrow-glib/entities.xml.in @@ -0,0 +1,24 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<!ENTITY package "@PACKAGE@"> +<!ENTITY package_bugreport "@PACKAGE_BUGREPORT@"> +<!ENTITY package_name "@PACKAGE_NAME@"> +<!ENTITY package_string "@PACKAGE_STRING@"> +<!ENTITY package_url "@PACKAGE_URL@"> +<!ENTITY package_version "@PACKAGE_VERSION@"> diff --git a/src/arrow/c_glib/doc/arrow-glib/meson.build b/src/arrow/c_glib/doc/arrow-glib/meson.build new file mode 100644 index 000000000..eeb2fd85d --- /dev/null +++ b/src/arrow/c_glib/doc/arrow-glib/meson.build @@ -0,0 +1,93 @@ +# -*- indent-tabs-mode: nil -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +package_id = meson.project_name() +package_name = 'Apache Arrow GLib' +entities_conf = configuration_data() +entities_conf.set('PACKAGE', package_id) +entities_conf.set('PACKAGE_BUGREPORT', + 'https://issues.apache.org/jira/browse/ARROW') +entities_conf.set('PACKAGE_NAME', package_name) +entities_conf.set('PACKAGE_STRING', + ' '.join([package_name, version])) +entities_conf.set('PACKAGE_URL', 'https://arrow.apache.org/') +entities_conf.set('PACKAGE_VERSION', version) +configure_file(input: 'entities.xml.in', + output: 'entities.xml', + configuration: entities_conf) + +private_headers = [ +] + +content_files = [ +] + +html_images = [ +] + +glib_prefix = dependency('glib-2.0').get_pkgconfig_variable('prefix') +glib_doc_path = join_paths(glib_prefix, 'share', 'gtk-doc', 'html') +doc_path = join_paths(data_dir, 'gtk-doc', 'html', package_id) + +source_directories = [ + join_paths(meson.source_root(), package_id), + join_paths(meson.build_root(), package_id), +] +dependencies = [ + arrow_glib, +] +if arrow_cuda.found() + source_directories += [ + join_paths(meson.source_root(), 'arrow-cuda-glib'), + join_paths(meson.build_root(), 'arrow-cuda-glib'), + ] + dependencies += [ + arrow_cuda_glib, + ] +endif +ignore_headers = [ + join_paths(meson.source_root(), 'arrow-glib', 'gobject-type.h'), +] +if not have_arrow_orc + ignore_headers += [ + join_paths(meson.source_root(), 'arrow-glib', 'orc-file-reader.h'), + ] +endif +gnome.gtkdoc(package_id, + main_xml: package_id + '-docs.xml', + src_dir: source_directories, + dependencies: dependencies, + ignore_headers: ignore_headers, + gobject_typesfile: package_id + '.types', + scan_args: [ + '--rebuild-types', + '--deprecated-guards=GARROW_DISABLE_DEPRECATED', + ], + mkdb_args: [ + '--output-format=xml', + '--name-space=garrow', + '--source-suffixes=c,cpp,h', + ], + fixxref_args: [ + '--html-dir=' + doc_path, + '--extra-dir=' + join_paths(glib_doc_path, 'glib'), + '--extra-dir=' + join_paths(glib_doc_path, 'gobject'), + ], + html_assets: html_images, + install: true) diff --git a/src/arrow/c_glib/doc/gandiva-glib/entities.xml.in b/src/arrow/c_glib/doc/gandiva-glib/entities.xml.in new file mode 100644 index 000000000..aa5addb4e --- /dev/null +++ b/src/arrow/c_glib/doc/gandiva-glib/entities.xml.in @@ -0,0 +1,24 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<!ENTITY package "@PACKAGE@"> +<!ENTITY package_bugreport "@PACKAGE_BUGREPORT@"> +<!ENTITY package_name "@PACKAGE_NAME@"> +<!ENTITY package_string "@PACKAGE_STRING@"> +<!ENTITY package_url "@PACKAGE_URL@"> +<!ENTITY package_version "@PACKAGE_VERSION@"> diff --git a/src/arrow/c_glib/doc/gandiva-glib/gandiva-glib-docs.xml b/src/arrow/c_glib/doc/gandiva-glib/gandiva-glib-docs.xml new file mode 100644 index 000000000..182bbfb52 --- /dev/null +++ b/src/arrow/c_glib/doc/gandiva-glib/gandiva-glib-docs.xml @@ -0,0 +1,128 @@ +<?xml version="1.0"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN" + "http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd" +[ + <!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'"> + <!ENTITY % gtkdocentities SYSTEM "entities.xml"> + %gtkdocentities; +]> +<book id="index" xmlns:xi="http://www.w3.org/2003/XInclude"> + <bookinfo> + <title>&package_name; Reference Manual</title> + <releaseinfo> + for &package_string;. + <!-- + The latest version of this documentation can be found on-line at + <ulink role="online-location" url="http://[SERVER]/&package_name;/">http://[SERVER]/&package_name;/</ulink>. + --> + </releaseinfo> + </bookinfo> + + <part id="evaluate-expression"> + <title>Evaluate Expression</title> + <chapter id="expression"> + <title>Expression</title> + <xi:include href="xml/expression.xml"/> + </chapter> + <chapter id="filter"> + <title>Filter</title> + <xi:include href="xml/filter.xml"/> + </chapter> + <chapter id="selection-vector"> + <title>Selection vector</title> + <xi:include href="xml/selection-vector.xml"/> + </chapter> + <chapter id="projector"> + <title>Projector</title> + <xi:include href="xml/projector.xml"/> + </chapter> + </part> + + <part id="expression-tree"> + <title>Expression Tree</title> + <chapter id="node"> + <title>Node</title> + <xi:include href="xml/node.xml"/> + </chapter> + </part> + + <part id="function"> + <title>Function</title> + <chapter id="function-registry"> + <title>Registry</title> + <xi:include href="xml/function-registry.xml"/> + </chapter> + <chapter id="function-signature"> + <title>Signature</title> + <xi:include href="xml/function-signature.xml"/> + </chapter> + <chapter id="native-function"> + <title>Native function</title> + <xi:include href="xml/native-function.xml"/> + </chapter> + </part> + + <part id="misc"> + <title>Misc</title> + <chapter id="version"> + <title>Version</title> + <xi:include href="xml/version.xml"></xi:include> + </chapter> + </part> + + <chapter id="object-tree"> + <title>Object Hierarchy</title> + <xi:include href="xml/tree_index.sgml"/> + </chapter> + <index id="api-index-full"> + <title>API Index</title> + <xi:include href="xml/api-index-full.xml"><xi:fallback /></xi:include> + </index> + <index id="deprecated-api-index" role="deprecated"> + <title>Index of deprecated API</title> + <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-4-0-0" role="4.0.0"> + <title>Index of new symbols in 4.0.0</title> + <xi:include href="xml/api-index-4.0.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-1-0-0" role="1.0.0"> + <title>Index of new symbols in 1.0.0</title> + <xi:include href="xml/api-index-1.0.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-0-17-0" role="0.17.0"> + <title>Index of new symbols in 0.17.0</title> + <xi:include href="xml/api-index-0.17.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-0-15-0" role="0.15.0"> + <title>Index of new symbols in 0.15.0</title> + <xi:include href="xml/api-index-0.15.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-0-14-0" role="0.14.0"> + <title>Index of new symbols in 0.14.0</title> + <xi:include href="xml/api-index-0.14.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-0-12-0" role="0.12.0"> + <title>Index of new symbols in 0.12.0</title> + <xi:include href="xml/api-index-0.12.0.xml"><xi:fallback /></xi:include> + </index> + <xi:include href="xml/annotation-glossary.xml"><xi:fallback /></xi:include> +</book> diff --git a/src/arrow/c_glib/doc/gandiva-glib/meson.build b/src/arrow/c_glib/doc/gandiva-glib/meson.build new file mode 100644 index 000000000..7ff815f42 --- /dev/null +++ b/src/arrow/c_glib/doc/gandiva-glib/meson.build @@ -0,0 +1,83 @@ +# -*- indent-tabs-mode: nil -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +package_id = 'gandiva-glib' +package_name = 'Gandiva GLib' +entities_conf = configuration_data() +entities_conf.set('PACKAGE', package_id) +entities_conf.set('PACKAGE_BUGREPORT', + 'https://issues.apache.org/jira/browse/ARROW') +entities_conf.set('PACKAGE_NAME', package_name) +entities_conf.set('PACKAGE_STRING', + ' '.join([package_name, version])) +entities_conf.set('PACKAGE_URL', 'https://arrow.apache.org/') +entities_conf.set('PACKAGE_VERSION', version) +configure_file(input: 'entities.xml.in', + output: 'entities.xml', + configuration: entities_conf) + +private_headers = [ +] + +content_files = [ +] + +html_images = [ +] + +glib_prefix = dependency('glib-2.0').get_pkgconfig_variable('prefix') +glib_doc_path = join_paths(glib_prefix, 'share', 'gtk-doc', 'html') +arrow_glib_doc_path = join_paths(data_dir, + 'gtk-doc', + 'html', + 'arrow-glib') +doc_path = join_paths(data_dir, 'gtk-doc', 'html', package_id) + +source_directories = [ + join_paths(meson.source_root(), package_id), + join_paths(meson.build_root(), package_id), +] +dependencies = [ + arrow_glib, + gandiva_glib, +] +ignore_headers = [] +gnome.gtkdoc(package_id, + main_xml: package_id + '-docs.xml', + src_dir: source_directories, + dependencies: dependencies, + ignore_headers: ignore_headers, + gobject_typesfile: package_id + '.types', + scan_args: [ + '--rebuild-types', + '--deprecated-guards=GGANDIVA_DISABLE_DEPRECATED', + ], + mkdb_args: [ + '--output-format=xml', + '--name-space=ggandiva', + '--source-suffixes=c,cpp,h', + ], + fixxref_args: [ + '--html-dir=' + doc_path, + '--extra-dir=' + join_paths(glib_doc_path, 'glib'), + '--extra-dir=' + join_paths(glib_doc_path, 'gobject'), + '--extra-dir=' + arrow_glib_doc_path, + ], + html_assets: html_images, + install: true) diff --git a/src/arrow/c_glib/doc/parquet-glib/entities.xml.in b/src/arrow/c_glib/doc/parquet-glib/entities.xml.in new file mode 100644 index 000000000..aa5addb4e --- /dev/null +++ b/src/arrow/c_glib/doc/parquet-glib/entities.xml.in @@ -0,0 +1,24 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<!ENTITY package "@PACKAGE@"> +<!ENTITY package_bugreport "@PACKAGE_BUGREPORT@"> +<!ENTITY package_name "@PACKAGE_NAME@"> +<!ENTITY package_string "@PACKAGE_STRING@"> +<!ENTITY package_url "@PACKAGE_URL@"> +<!ENTITY package_version "@PACKAGE_VERSION@"> diff --git a/src/arrow/c_glib/doc/parquet-glib/meson.build b/src/arrow/c_glib/doc/parquet-glib/meson.build new file mode 100644 index 000000000..f4ee794d0 --- /dev/null +++ b/src/arrow/c_glib/doc/parquet-glib/meson.build @@ -0,0 +1,83 @@ +# -*- indent-tabs-mode: nil -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +package_id = 'parquet-glib' +package_name = 'Apache Parquet GLib' +entities_conf = configuration_data() +entities_conf.set('PACKAGE', package_id) +entities_conf.set('PACKAGE_BUGREPORT', + 'https://issues.apache.org/jira/browse/PARQUET') +entities_conf.set('PACKAGE_NAME', package_name) +entities_conf.set('PACKAGE_STRING', + ' '.join([package_name, version])) +entities_conf.set('PACKAGE_URL', 'https://arrow.apache.org/') +entities_conf.set('PACKAGE_VERSION', version) +configure_file(input: 'entities.xml.in', + output: 'entities.xml', + configuration: entities_conf) + +private_headers = [ +] + +content_files = [ +] + +html_images = [ +] + +glib_prefix = dependency('glib-2.0').get_pkgconfig_variable('prefix') +glib_doc_path = join_paths(glib_prefix, 'share', 'gtk-doc', 'html') +arrow_glib_doc_path = join_paths(data_dir, + 'gtk-doc', + 'html', + 'arrow-glib') +doc_path = join_paths(data_dir, 'gtk-doc', 'html', package_id) + +source_directories = [ + join_paths(meson.source_root(), package_id), + join_paths(meson.build_root(), package_id), +] +dependencies = [ + parquet_glib, + arrow_glib, +] +ignore_headers = [] +gnome.gtkdoc(package_id, + main_xml: package_id + '-docs.xml', + src_dir: source_directories, + dependencies: dependencies, + ignore_headers: ignore_headers, + gobject_typesfile: package_id + '.types', + scan_args: [ + '--rebuild-types', + '--deprecated-guards=GARROW_DISABLE_DEPRECATED', + ], + mkdb_args: [ + '--output-format=xml', + '--name-space=gparquet', + '--source-suffixes=c,cpp,h', + ], + fixxref_args: [ + '--html-dir=' + doc_path, + '--extra-dir=' + join_paths(glib_doc_path, 'glib'), + '--extra-dir=' + join_paths(glib_doc_path, 'gobject'), + '--extra-dir=' + arrow_glib_doc_path, + ], + html_assets: html_images, + install: true) diff --git a/src/arrow/c_glib/doc/parquet-glib/parquet-glib-docs.xml b/src/arrow/c_glib/doc/parquet-glib/parquet-glib-docs.xml new file mode 100644 index 000000000..0d42a7d5b --- /dev/null +++ b/src/arrow/c_glib/doc/parquet-glib/parquet-glib-docs.xml @@ -0,0 +1,85 @@ +<?xml version="1.0"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN" + "http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd" +[ + <!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'"> + <!ENTITY % gtkdocentities SYSTEM "entities.xml"> + %gtkdocentities; +]> +<book id="index" xmlns:xi="http://www.w3.org/2003/XInclude"> + <bookinfo> + <title>&package_name; Reference Manual</title> + <releaseinfo> + for &package_string;. + <!-- + The latest version of this documentation can be found on-line at + <ulink role="online-location" url="http://[SERVER]/&package_name;/">http://[SERVER]/&package_name;/</ulink>. + --> + </releaseinfo> + </bookinfo> + + <part id="arrow"> + <title>Arrow</title> + <chapter id="io"> + <title>IO</title> + <xi:include href="xml/arrow-file-reader.xml"/> + <xi:include href="xml/arrow-file-writer.xml"/> + </chapter> + </part> + + <chapter id="object-tree"> + <title>Object Hierarchy</title> + <xi:include href="xml/tree_index.sgml"/> + </chapter> + <index id="api-index-full"> + <title>API Index</title> + <xi:include href="xml/api-index-full.xml"><xi:fallback /></xi:include> + </index> + <index id="deprecated-api-index" role="deprecated"> + <title>Index of deprecated API</title> + <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-6-0-0" role="6.0.0"> + <title>Index of new symbols in 6.0.0</title> + <xi:include href="xml/api-index-6.0.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-1-0-0" role="1.0.0"> + <title>Index of new symbols in 1.0.0</title> + <xi:include href="xml/api-index-1.0.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-0-17-0" role="0.17.0"> + <title>Index of new symbols in 0.17.0</title> + <xi:include href="xml/api-index-0.17.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-0-15-0" role="0.15.0"> + <title>Index of new symbols in 0.15.0</title> + <xi:include href="xml/api-index-0.15.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-0-12-0" role="0.12.0"> + <title>Index of new symbols in 0.12.0</title> + <xi:include href="xml/api-index-0.12.0.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-0-11-0" role="0.11.0"> + <title>Index of new symbols in 0.11.0</title> + <xi:include href="xml/api-index-0.11.0.xml"><xi:fallback /></xi:include> + </index> + <xi:include href="xml/annotation-glossary.xml"><xi:fallback /></xi:include> +</book> diff --git a/src/arrow/c_glib/doc/plasma-glib/entities.xml.in b/src/arrow/c_glib/doc/plasma-glib/entities.xml.in new file mode 100644 index 000000000..aa5addb4e --- /dev/null +++ b/src/arrow/c_glib/doc/plasma-glib/entities.xml.in @@ -0,0 +1,24 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<!ENTITY package "@PACKAGE@"> +<!ENTITY package_bugreport "@PACKAGE_BUGREPORT@"> +<!ENTITY package_name "@PACKAGE_NAME@"> +<!ENTITY package_string "@PACKAGE_STRING@"> +<!ENTITY package_url "@PACKAGE_URL@"> +<!ENTITY package_version "@PACKAGE_VERSION@"> diff --git a/src/arrow/c_glib/doc/plasma-glib/meson.build b/src/arrow/c_glib/doc/plasma-glib/meson.build new file mode 100644 index 000000000..6a6de060f --- /dev/null +++ b/src/arrow/c_glib/doc/plasma-glib/meson.build @@ -0,0 +1,86 @@ +# -*- indent-tabs-mode: nil -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +package_id = 'plasma-glib' +package_name = 'Plasma GLib' +entities_conf = configuration_data() +entities_conf.set('PACKAGE', package_id) +entities_conf.set('PACKAGE_BUGREPORT', + 'https://issues.apache.org/jira/browse/ARROW') +entities_conf.set('PACKAGE_NAME', package_name) +entities_conf.set('PACKAGE_STRING', + ' '.join([package_name, version])) +entities_conf.set('PACKAGE_URL', 'https://arrow.apache.org/') +entities_conf.set('PACKAGE_VERSION', version) +configure_file(input: 'entities.xml.in', + output: 'entities.xml', + configuration: entities_conf) + +private_headers = [ +] + +content_files = [ +] + +html_images = [ +] + +glib_prefix = dependency('glib-2.0').get_pkgconfig_variable('prefix') +glib_doc_path = join_paths(glib_prefix, 'share', 'gtk-doc', 'html') +arrow_glib_doc_path = join_paths(data_dir, + 'gtk-doc', + 'html', + 'arrow-glib') +doc_path = join_paths(data_dir, 'gtk-doc', 'html', package_id) + +source_directories = [ + join_paths(meson.source_root(), package_id), + join_paths(meson.build_root(), package_id), +] +dependencies = [ + plasma_glib, + arrow_glib, +] +if arrow_cuda.found() + dependencies += [arrow_cuda_glib] +endif +ignore_headers = [] +gnome.gtkdoc(package_id, + main_xml: package_id + '-docs.xml', + src_dir: source_directories, + dependencies: dependencies, + ignore_headers: ignore_headers, + gobject_typesfile: package_id + '.types', + scan_args: [ + '--rebuild-types', + '--deprecated-guards=GPLASMA_DISABLE_DEPRECATED', + ], + mkdb_args: [ + '--output-format=xml', + '--name-space=gplasma', + '--source-suffixes=c,cpp,h', + ], + fixxref_args: [ + '--html-dir=' + doc_path, + '--extra-dir=' + join_paths(glib_doc_path, 'glib'), + '--extra-dir=' + join_paths(glib_doc_path, 'gobject'), + '--extra-dir=' + arrow_glib_doc_path, + ], + html_assets: html_images, + install: true) diff --git a/src/arrow/c_glib/doc/plasma-glib/plasma-glib-docs.xml b/src/arrow/c_glib/doc/plasma-glib/plasma-glib-docs.xml new file mode 100644 index 000000000..83d3aea9b --- /dev/null +++ b/src/arrow/c_glib/doc/plasma-glib/plasma-glib-docs.xml @@ -0,0 +1,68 @@ +<?xml version="1.0"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN" + "http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd" +[ + <!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'"> + <!ENTITY % gtkdocentities SYSTEM "entities.xml"> + %gtkdocentities; +]> +<book id="index" xmlns:xi="http://www.w3.org/2003/XInclude"> + <bookinfo> + <title>&package_name; Reference Manual</title> + <releaseinfo> + for &package_string;. + <!-- + The latest version of this documentation can be found on-line at + <ulink role="online-location" url="http://[SERVER]/&package_name;/">http://[SERVER]/&package_name;/</ulink>. + --> + </releaseinfo> + </bookinfo> + + <part id="client-side"> + <title>Client side</title> + <chapter id="client"> + <title>Client</title> + <xi:include href="xml/client.xml"/> + </chapter> + <chapter id="object"> + <title>Object</title> + <xi:include href="xml/object.xml"/> + </chapter> + </part> + + <chapter id="object-tree"> + <title>Object Hierarchy</title> + <xi:include href="xml/tree_index.sgml"/> + </chapter> + <index id="api-index-full"> + <title>API Index</title> + <xi:include href="xml/api-index-full.xml"><xi:fallback /></xi:include> + </index> + <index id="deprecated-api-index" role="deprecated"> + <title>Index of deprecated API</title> + <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include> + </index> + <index id="api-index-0-12-0" role="0.12.0"> + <title>Index of new symbols in 0.12.0</title> + <xi:include href="xml/api-index-0.12.0.xml"><xi:fallback /></xi:include> + </index> + <xi:include href="xml/annotation-glossary.xml"><xi:fallback /></xi:include> +</book> diff --git a/src/arrow/c_glib/example/README.md b/src/arrow/c_glib/example/README.md new file mode 100644 index 000000000..b69145d68 --- /dev/null +++ b/src/arrow/c_glib/example/README.md @@ -0,0 +1,48 @@ +<!--- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +# Arrow GLib example + +There are example codes in this directory. + +C example codes exist in this directory. Language bindings example +codes exists in sub directories. For example, Lua example codes exists +in `lua/` sub directory. + +## C example codes + +Here are example codes in this directory: + + * `build.c`: It shows how to create an array by array builder. + +<!--- + * `write-batch.c`: It shows how to write Arrow array to file in batch + mode. +--> + + * `read-batch.c`: It shows how to read Arrow array from file in batch + mode. + +<!--- + * `write-stream.c`: It shows how to write Arrow array to file in + stream mode. +--> + + * `read-stream.c`: It shows how to read Arrow array from file in + stream mode. diff --git a/src/arrow/c_glib/example/build.c b/src/arrow/c_glib/example/build.c new file mode 100644 index 000000000..9b2d58d2b --- /dev/null +++ b/src/arrow/c_glib/example/build.c @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <stdlib.h> + +#include <arrow-glib/arrow-glib.h> + +int +main(int argc, char **argv) +{ + GArrowArray *array; + + { + GArrowInt32ArrayBuilder *builder; + gboolean success = TRUE; + GError *error = NULL; + + builder = garrow_int32_array_builder_new(); + if (success) { + success = garrow_int32_array_builder_append_value(builder, 29, &error); + } + if (success) { + success = garrow_int32_array_builder_append_value(builder, 2929, &error); + } + if (success) { + success = garrow_int32_array_builder_append_value(builder, 292929, &error); + } + if (!success) { + g_print("failed to append: %s\n", error->message); + g_error_free(error); + g_object_unref(builder); + return EXIT_FAILURE; + } + array = garrow_array_builder_finish(GARROW_ARRAY_BUILDER(builder), &error); + if (!array) { + g_print("failed to finish: %s\n", error->message); + g_error_free(error); + g_object_unref(builder); + return EXIT_FAILURE; + } + g_object_unref(builder); + } + + { + gint64 i, n; + + n = garrow_array_get_length(array); + g_print("length: %" G_GINT64_FORMAT "\n", n); + for (i = 0; i < n; i++) { + gint32 value; + + value = garrow_int32_array_get_value(GARROW_INT32_ARRAY(array), i); + g_print("array[%" G_GINT64_FORMAT "] = %d\n", + i, value); + } + } + + g_object_unref(array); + + return EXIT_SUCCESS; +} diff --git a/src/arrow/c_glib/example/extension-type.c b/src/arrow/c_glib/example/extension-type.c new file mode 100644 index 000000000..a23fa427d --- /dev/null +++ b/src/arrow/c_glib/example/extension-type.c @@ -0,0 +1,381 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <stdlib.h> + +#include <arrow-glib/arrow-glib.h> + +#define EXAMPLE_TYPE_UUID_ARRAY (example_uuid_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(ExampleUUIDArray, + example_uuid_array, + EXAMPLE, + UUID_ARRAY, + GArrowExtensionArray) +struct _ExampleUUIDArrayClass +{ + GArrowExtensionArrayClass parent_class; +}; + +G_DEFINE_TYPE(ExampleUUIDArray, + example_uuid_array, + GARROW_TYPE_EXTENSION_ARRAY) + +static void +example_uuid_array_init(ExampleUUIDArray *object) +{ +} + +static void +example_uuid_array_class_init(ExampleUUIDArrayClass *klass) +{ +} + + +#define EXAMPLE_TYPE_UUID_DATA_TYPE (example_uuid_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(ExampleUUIDDataType, + example_uuid_data_type, + EXAMPLE, + UUID_DATA_TYPE, + GArrowExtensionDataType) +struct _ExampleUUIDDataTypeClass +{ + GArrowExtensionDataTypeClass parent_class; +}; + + +G_DEFINE_TYPE(ExampleUUIDDataType, + example_uuid_data_type, + GARROW_TYPE_EXTENSION_DATA_TYPE) + +static gchar * +example_uuid_data_type_get_extension_name(GArrowExtensionDataType *data_type) +{ + return g_strdup("uuid"); +} + +static gboolean +example_uuid_data_type_equal(GArrowExtensionDataType *data_type, + GArrowExtensionDataType *other_data_type) +{ + /* Compare parameters if they exists. */ + return TRUE; +} + +static const gchar *example_uuid_data_type_serialize_id = "uuid-serialized"; +static ExampleUUIDDataType *example_uuid_data_type_new(void); + +static GArrowDataType * +example_uuid_data_type_deserialize(GArrowExtensionDataType *data_type, + GArrowDataType *storage_data_type, + GBytes *serialized_data, + GError **error) +{ + gsize raw_data_size; + gconstpointer raw_data = g_bytes_get_data(serialized_data, &raw_data_size); + if (!(raw_data_size == strlen(example_uuid_data_type_serialize_id) && + strncmp(raw_data, + example_uuid_data_type_serialize_id, + raw_data_size) == 0)) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "[uuid-data-type][deserialize] " + "identifier must be <%s>: <%.*s>", + example_uuid_data_type_serialize_id, + (gint)raw_data_size, + (const gchar *)raw_data); + return NULL; + } + + GArrowDataType *expected_storage_data_type; + g_object_get(data_type, + "storage-data-type", &expected_storage_data_type, + NULL); + if (!garrow_data_type_equal(storage_data_type, + expected_storage_data_type)) { + gchar *expected = garrow_data_type_to_string(expected_storage_data_type); + gchar *actual = garrow_data_type_to_string(storage_data_type); + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "[uuid-data-type][deserialize] " + "storage data type must be <%s>: <%s>", + expected, + actual); + g_free(actual); + g_free(expected); + return NULL; + } + + return GARROW_DATA_TYPE(example_uuid_data_type_new()); +} + +static GBytes * +example_uuid_data_type_serialize(GArrowExtensionDataType *data_type) +{ + return g_bytes_new_static(example_uuid_data_type_serialize_id, + strlen(example_uuid_data_type_serialize_id)); +} + +static GType +example_uuid_data_type_get_array_gtype(GArrowExtensionDataType *data_type) +{ + return EXAMPLE_TYPE_UUID_ARRAY; +} + +static void +example_uuid_data_type_init(ExampleUUIDDataType *object) +{ +} + +static void +example_uuid_data_type_class_init(ExampleUUIDDataTypeClass *klass) +{ + GArrowExtensionDataTypeClass *extension_klass = + GARROW_EXTENSION_DATA_TYPE_CLASS(klass); + extension_klass->get_extension_name = + example_uuid_data_type_get_extension_name; + extension_klass->equal = example_uuid_data_type_equal; + extension_klass->deserialize = example_uuid_data_type_deserialize; + extension_klass->serialize = example_uuid_data_type_serialize; + extension_klass->get_array_gtype = example_uuid_data_type_get_array_gtype; +} + +static ExampleUUIDDataType * +example_uuid_data_type_new(void) +{ + GArrowFixedSizeBinaryDataType *storage_data_type = + garrow_fixed_size_binary_data_type_new(16); + return g_object_new(EXAMPLE_TYPE_UUID_DATA_TYPE, + "storage-data-type", storage_data_type, + NULL); +} + + +int +main(int argc, char **argv) +{ + GArrowExtensionDataTypeRegistry *registry = + garrow_extension_data_type_registry_default(); + + /* Create UUID extension data type. */ + ExampleUUIDDataType *uuid_data_type = example_uuid_data_type_new(); + GArrowExtensionDataType *extension_data_type = + GARROW_EXTENSION_DATA_TYPE(uuid_data_type); + /* Register the created UUID extension data type. */ + GError *error = NULL; + if (!garrow_extension_data_type_registry_register(registry, + extension_data_type, + &error)) { + g_print("failed to register: %s\n", error->message); + g_error_free(error); + g_object_unref(registry); + return EXIT_FAILURE; + } + + { + /* Build storage data for the created UUID extension data type. */ + GArrowFixedSizeBinaryDataType *storage_data_type; + g_object_get(extension_data_type, + "storage-data-type", &storage_data_type, + NULL); + GArrowFixedSizeBinaryArrayBuilder *builder = + garrow_fixed_size_binary_array_builder_new(storage_data_type); + g_object_unref(storage_data_type); + garrow_fixed_size_binary_array_builder_append_value( + builder, + (const guint8 *)"0123456789012345", + 16, + &error); + if (!error) { + garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), &error); + } + if (!error) { + garrow_fixed_size_binary_array_builder_append_value( + builder, + (const guint8 *)"abcdefghijklmnop", + 16, + &error); + } + if (error) { + g_print("failed to append elements: %s\n", error->message); + g_error_free(error); + g_object_unref(builder); + goto exit; + } + GArrowArray *storage = + garrow_array_builder_finish(GARROW_ARRAY_BUILDER(builder), &error); + g_object_unref(builder); + if (error) { + g_print("failed to build storage: %s\n", error->message); + g_error_free(error); + goto exit; + } + + /* Wrap the created storage data as the created UUID extension array. */ + GArrowExtensionArray *extension_array = + garrow_extension_data_type_wrap_array(extension_data_type, + storage); + g_object_unref(storage); + gint64 n_rows = garrow_array_get_length(GARROW_ARRAY(extension_array)); + + /* Create a record batch to serialize the created UUID extension array. */ + GList *fields = NULL; + fields = g_list_append(fields, + garrow_field_new("uuid", + GARROW_DATA_TYPE(uuid_data_type))); + GArrowSchema *schema = garrow_schema_new(fields); + g_list_free_full(fields, g_object_unref); + GList *columns = NULL; + columns = g_list_append(columns, extension_array); + GArrowRecordBatch *record_batch = + garrow_record_batch_new(schema, n_rows, columns, &error); + g_list_free_full(columns, g_object_unref); + if (error) { + g_print("failed to create record batch: %s\n", error->message); + g_error_free(error); + g_object_unref(schema); + goto exit; + } + + /* Serialize the created record batch. */ + GArrowResizableBuffer *buffer = garrow_resizable_buffer_new(0, &error); + if (error) { + g_print("failed to create buffer: %s\n", error->message); + g_error_free(error); + g_object_unref(schema); + g_object_unref(record_batch); + goto exit; + } + { + GArrowBufferOutputStream *output = + garrow_buffer_output_stream_new(buffer); + GArrowRecordBatchStreamWriter *writer = + garrow_record_batch_stream_writer_new(GARROW_OUTPUT_STREAM(output), + schema, + &error); + if (error) { + g_print("failed to create writer: %s\n", error->message); + g_error_free(error); + g_object_unref(output); + g_object_unref(buffer); + g_object_unref(schema); + g_object_unref(record_batch); + goto exit; + } + garrow_record_batch_writer_write_record_batch( + GARROW_RECORD_BATCH_WRITER(writer), + record_batch, + &error); + if (error) { + g_print("failed to write record batch: %s\n", error->message); + g_error_free(error); + g_object_unref(writer); + g_object_unref(output); + g_object_unref(buffer); + g_object_unref(schema); + g_object_unref(record_batch); + goto exit; + } + g_object_unref(schema); + g_object_unref(record_batch); + garrow_record_batch_writer_close(GARROW_RECORD_BATCH_WRITER(writer), + &error); + g_object_unref(writer); + g_object_unref(output); + if (error) { + g_print("failed to close writer: %s\n", error->message); + g_error_free(error); + g_object_unref(buffer); + goto exit; + } + } + + /* Deserialize the serialized record batch. */ + { + GArrowBufferInputStream *input = + garrow_buffer_input_stream_new(GARROW_BUFFER(buffer)); + GArrowRecordBatchStreamReader *reader = + garrow_record_batch_stream_reader_new(GARROW_INPUT_STREAM(input), + &error); + if (error) { + g_print("failed to create reader: %s\n", error->message); + g_error_free(error); + g_object_unref(input); + g_object_unref(buffer); + goto exit; + } + record_batch = + garrow_record_batch_reader_read_next(GARROW_RECORD_BATCH_READER(reader), + &error); + if (error) { + g_print("failed to read record batch: %s\n", error->message); + g_error_free(error); + g_object_unref(reader); + g_object_unref(input); + g_object_unref(buffer); + goto exit; + } + /* Show the deserialize record batch. */ + gchar *record_batch_content = + garrow_record_batch_to_string(record_batch, + &error); + if (error) { + g_print("failed to dump record batch content: %s\n", error->message); + g_error_free(error); + error = NULL; + } else { + g_print("record batch:\n%s\n", record_batch_content); + } + /* Get the deserialize UUID extension array. */ + GArrowArray *deserialized_array = + garrow_record_batch_get_column_data(record_batch, 0); + g_print("array: %s\n", G_OBJECT_TYPE_NAME(deserialized_array)); + g_object_unref(deserialized_array); + + g_object_unref(record_batch); + g_object_unref(reader); + g_object_unref(input); + } + + g_object_unref(buffer); + } + +exit: + /* Unregister the created UUID extension data type. */ + { + gchar *data_type_name = + garrow_extension_data_type_get_extension_name(extension_data_type); + gboolean success = + garrow_extension_data_type_registry_unregister(registry, + data_type_name, + &error); + g_free(data_type_name); + if (!success) { + g_print("failed to unregister: %s\n", error->message); + g_error_free(error); + g_object_unref(registry); + return EXIT_FAILURE; + } + } + + g_object_unref(registry); + + return EXIT_SUCCESS; +} diff --git a/src/arrow/c_glib/example/lua/README.md b/src/arrow/c_glib/example/lua/README.md new file mode 100644 index 000000000..7d388d46a --- /dev/null +++ b/src/arrow/c_glib/example/lua/README.md @@ -0,0 +1,50 @@ +<!--- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +# Arrow Lua example + +There are Lua example codes in this directory. + +## How to run + +All example codes use [LGI](https://github.com/pavouk/lgi) to use +Arrow GLib based bindings. + +Here are command lines to install LGI on Debian GNU/Linux and Ubuntu: + +```text +% sudo apt install -y luarocks +% sudo luarocks install lgi +``` + +## Lua example codes + +Here are example codes in this directory: + + * `write-batch.lua`: It shows how to write Arrow array to file in + batch mode. + + * `read-batch.lua`: It shows how to read Arrow array from file in + batch mode. + + * `write-stream.lua`: It shows how to write Arrow array to file in + stream mode. + + * `read-stream.lua`: It shows how to read Arrow array from file in + stream mode. diff --git a/src/arrow/c_glib/example/lua/meson.build b/src/arrow/c_glib/example/lua/meson.build new file mode 100644 index 000000000..8fe3e5f23 --- /dev/null +++ b/src/arrow/c_glib/example/lua/meson.build @@ -0,0 +1,28 @@ +# -*- indent-tabs-mode: nil -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +install_data('README.md', + 'read-batch.lua', + 'read-stream.lua', + 'write-batch.lua', + 'write-stream.lua', + install_dir: join_paths(data_dir, + meson.project_name(), + 'example', + 'lua')) diff --git a/src/arrow/c_glib/example/lua/read-batch.lua b/src/arrow/c_glib/example/lua/read-batch.lua new file mode 100644 index 000000000..a4c86763f --- /dev/null +++ b/src/arrow/c_glib/example/lua/read-batch.lua @@ -0,0 +1,44 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +local lgi = require 'lgi' +local Arrow = lgi.Arrow + +local input_path = arg[1] or "/tmp/batch.arrow"; + +local input = Arrow.MemoryMappedInputStream.new(input_path) +local reader = Arrow.RecordBatchFileReader.new(input) + +for i = 0, reader:get_n_record_batches() - 1 do + local record_batch = reader:read_record_batch(i) + print(string.rep("=", 40)) + print("record-batch["..i.."]:") + for j = 0, record_batch:get_n_columns() - 1 do + local column_name = record_batch:get_column_name(j) + local column_data = record_batch:get_column_data(j) + io.write(" "..column_name..": [") + for k = 0, record_batch:get_n_rows() - 1 do + if k > 0 then + io.write(", ") + end + io.write(column_data:get_value(k)) + end + print("]") + end +end + +input:close() diff --git a/src/arrow/c_glib/example/lua/read-stream.lua b/src/arrow/c_glib/example/lua/read-stream.lua new file mode 100644 index 000000000..7bf1083e2 --- /dev/null +++ b/src/arrow/c_glib/example/lua/read-stream.lua @@ -0,0 +1,51 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +local lgi = require 'lgi' +local Arrow = lgi.Arrow + +local input_path = arg[1] or "/tmp/stream.arrow"; + +local input = Arrow.MemoryMappedInputStream.new(input_path) +local reader = Arrow.RecordBatchStreamReader.new(input) + +local i = 0 +while true do + local record_batch = reader:read_next() + if not record_batch then + break + end + + print(string.rep("=", 40)) + print("record-batch["..i.."]:") + for j = 0, record_batch:get_n_columns() - 1 do + local column_name = record_batch:get_column_name(j) + local column_data = record_batch:get_column_data(j) + io.write(" "..column_name..": [") + for k = 0, record_batch:get_n_rows() - 1 do + if k > 0 then + io.write(", ") + end + io.write(column_data:get_value(k)) + end + print("]") + end + + i = i + 1 +end + +input:close() diff --git a/src/arrow/c_glib/example/lua/write-batch.lua b/src/arrow/c_glib/example/lua/write-batch.lua new file mode 100644 index 000000000..26acc1dff --- /dev/null +++ b/src/arrow/c_glib/example/lua/write-batch.lua @@ -0,0 +1,74 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +local lgi = require 'lgi' +local Arrow = lgi.Arrow + +local output_path = arg[1] or "/tmp/batch.arrow"; + +local fields = { + Arrow.Field.new("uint8", Arrow.UInt8DataType.new()), + Arrow.Field.new("uint16", Arrow.UInt16DataType.new()), + Arrow.Field.new("uint32", Arrow.UInt32DataType.new()), + Arrow.Field.new("uint64", Arrow.UInt64DataType.new()), + Arrow.Field.new("int8", Arrow.Int8DataType.new()), + Arrow.Field.new("int16", Arrow.Int16DataType.new()), + Arrow.Field.new("int32", Arrow.Int32DataType.new()), + Arrow.Field.new("int64", Arrow.Int64DataType.new()), + Arrow.Field.new("float", Arrow.FloatDataType.new()), + Arrow.Field.new("double", Arrow.DoubleDataType.new()), +} +local schema = Arrow.Schema.new(fields) + +local output = Arrow.FileOutputStream.new(output_path, false) +local writer = Arrow.RecordBatchFileWriter.new(output, schema) + +function build_array(builder, values) + for _, value in pairs(values) do + builder:append(value) + end + return builder:finish() +end + +local uints = {1, 2, 4, 8} +local ints = {1, -2, 4, -8} +local floats = {1.1, -2.2, 4.4, -8.8} +local columns = { + build_array(Arrow.UInt8ArrayBuilder.new(), uints), + build_array(Arrow.UInt16ArrayBuilder.new(), uints), + build_array(Arrow.UInt32ArrayBuilder.new(), uints), + build_array(Arrow.UInt64ArrayBuilder.new(), uints), + build_array(Arrow.Int8ArrayBuilder.new(), ints), + build_array(Arrow.Int16ArrayBuilder.new(), ints), + build_array(Arrow.Int32ArrayBuilder.new(), ints), + build_array(Arrow.Int64ArrayBuilder.new(), ints), + build_array(Arrow.FloatArrayBuilder.new(), floats), + build_array(Arrow.DoubleArrayBuilder.new(), floats), +} + +local record_batch = Arrow.RecordBatch.new(schema, 4, columns) +writer:write_record_batch(record_batch) + +local sliced_columns = {} +for i, column in pairs(columns) do + sliced_columns[i] = column:slice(1, 3) +end +record_batch = Arrow.RecordBatch.new(schema, 3, sliced_columns) +writer:write_record_batch(record_batch) + +writer:close() +output:close() diff --git a/src/arrow/c_glib/example/lua/write-stream.lua b/src/arrow/c_glib/example/lua/write-stream.lua new file mode 100644 index 000000000..07bbd79af --- /dev/null +++ b/src/arrow/c_glib/example/lua/write-stream.lua @@ -0,0 +1,74 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +local lgi = require 'lgi' +local Arrow = lgi.Arrow + +local output_path = arg[1] or "/tmp/stream.arrow"; + +local fields = { + Arrow.Field.new("uint8", Arrow.UInt8DataType.new()), + Arrow.Field.new("uint16", Arrow.UInt16DataType.new()), + Arrow.Field.new("uint32", Arrow.UInt32DataType.new()), + Arrow.Field.new("uint64", Arrow.UInt64DataType.new()), + Arrow.Field.new("int8", Arrow.Int8DataType.new()), + Arrow.Field.new("int16", Arrow.Int16DataType.new()), + Arrow.Field.new("int32", Arrow.Int32DataType.new()), + Arrow.Field.new("int64", Arrow.Int64DataType.new()), + Arrow.Field.new("float", Arrow.FloatDataType.new()), + Arrow.Field.new("double", Arrow.DoubleDataType.new()), +} +local schema = Arrow.Schema.new(fields) + +local output = Arrow.FileOutputStream.new(output_path, false) +local writer = Arrow.RecordBatchStreamWriter.new(output, schema) + +function build_array(builder, values) + for _, value in pairs(values) do + builder:append(value) + end + return builder:finish() +end + +local uints = {1, 2, 4, 8} +local ints = {1, -2, 4, -8} +local floats = {1.1, -2.2, 4.4, -8.8} +local columns = { + build_array(Arrow.UInt8ArrayBuilder.new(), uints), + build_array(Arrow.UInt16ArrayBuilder.new(), uints), + build_array(Arrow.UInt32ArrayBuilder.new(), uints), + build_array(Arrow.UInt64ArrayBuilder.new(), uints), + build_array(Arrow.Int8ArrayBuilder.new(), ints), + build_array(Arrow.Int16ArrayBuilder.new(), ints), + build_array(Arrow.Int32ArrayBuilder.new(), ints), + build_array(Arrow.Int64ArrayBuilder.new(), ints), + build_array(Arrow.FloatArrayBuilder.new(), floats), + build_array(Arrow.DoubleArrayBuilder.new(), floats), +} + +local record_batch = Arrow.RecordBatch.new(schema, 4, columns) +writer:write_record_batch(record_batch) + +local sliced_columns = {} +for i, column in pairs(columns) do + sliced_columns[i] = column:slice(1, 3) +end +record_batch = Arrow.RecordBatch.new(schema, 3, sliced_columns) +writer:write_record_batch(record_batch) + +writer:close() +output:close() diff --git a/src/arrow/c_glib/example/meson.build b/src/arrow/c_glib/example/meson.build new file mode 100644 index 000000000..9a9bef1bd --- /dev/null +++ b/src/arrow/c_glib/example/meson.build @@ -0,0 +1,36 @@ +# -*- indent-tabs-mode: nil -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +executable('build', 'build.c', + dependencies: [arrow_glib]) +executable('extension-type', 'extension-type.c', + dependencies: [arrow_glib]) +executable('read-batch', 'read-batch.c', + dependencies: [arrow_glib]) +executable('read-stream', 'read-stream.c', + dependencies: [arrow_glib]) + +install_data('README.md', + 'build.c', + 'extension-type.c', + 'read-batch.c', + 'read-stream.c', + install_dir: join_paths(data_dir, meson.project_name(), 'example')) + +subdir('lua') diff --git a/src/arrow/c_glib/example/read-batch.c b/src/arrow/c_glib/example/read-batch.c new file mode 100644 index 000000000..273dc70ff --- /dev/null +++ b/src/arrow/c_glib/example/read-batch.c @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <stdlib.h> + +#include <arrow-glib/arrow-glib.h> + +static void +print_array(GArrowArray *array) +{ + GArrowType value_type; + gint64 i, n; + + value_type = garrow_array_get_value_type(array); + + g_print("["); + n = garrow_array_get_length(array); + +#define ARRAY_CASE(type, Type, TYPE, format) \ + case GARROW_TYPE_ ## TYPE: \ + { \ + GArrow ## Type ## Array *real_array; \ + real_array = GARROW_ ## TYPE ## _ARRAY(array); \ + for (i = 0; i < n; i++) { \ + if (i > 0) { \ + g_print(", "); \ + } \ + g_print(format, \ + garrow_ ## type ## _array_get_value(real_array, i)); \ + } \ + } \ + break + + switch (value_type) { + ARRAY_CASE(uint8, UInt8, UINT8, "%hhu"); + ARRAY_CASE(uint16, UInt16, UINT16, "%" G_GUINT16_FORMAT); + ARRAY_CASE(uint32, UInt32, UINT32, "%" G_GUINT32_FORMAT); + ARRAY_CASE(uint64, UInt64, UINT64, "%" G_GUINT64_FORMAT); + ARRAY_CASE( int8, Int8, INT8, "%hhd"); + ARRAY_CASE( int16, Int16, INT16, "%" G_GINT16_FORMAT); + ARRAY_CASE( int32, Int32, INT32, "%" G_GINT32_FORMAT); + ARRAY_CASE( int64, Int64, INT64, "%" G_GINT64_FORMAT); + ARRAY_CASE( float, Float, FLOAT, "%g"); + ARRAY_CASE(double, Double, DOUBLE, "%g"); + default: + break; + } +#undef ARRAY_CASE + + g_print("]\n"); +} + +static void +print_record_batch(GArrowRecordBatch *record_batch) +{ + guint nth_column, n_columns; + + n_columns = garrow_record_batch_get_n_columns(record_batch); + for (nth_column = 0; nth_column < n_columns; nth_column++) { + GArrowArray *array; + + g_print("columns[%u](%s): ", + nth_column, + garrow_record_batch_get_column_name(record_batch, nth_column)); + array = garrow_record_batch_get_column_data(record_batch, nth_column); + print_array(array); + g_object_unref(array); + } +} + +int +main(int argc, char **argv) +{ + const char *input_path = "/tmp/batch.arrow"; + GArrowMemoryMappedInputStream *input; + GError *error = NULL; + + if (argc > 1) + input_path = argv[1]; + input = garrow_memory_mapped_input_stream_new(input_path, + &error); + if (!input) { + g_print("failed to open file: %s\n", error->message); + g_error_free(error); + return EXIT_FAILURE; + } + + { + GArrowRecordBatchFileReader *reader; + + reader = + garrow_record_batch_file_reader_new(GARROW_SEEKABLE_INPUT_STREAM(input), + &error); + if (!reader) { + g_print("failed to open file reader: %s\n", error->message); + g_error_free(error); + g_object_unref(input); + return EXIT_FAILURE; + } + + { + guint i, n; + + n = garrow_record_batch_file_reader_get_n_record_batches(reader); + for (i = 0; i < n; i++) { + GArrowRecordBatch *record_batch; + + record_batch = + garrow_record_batch_file_reader_read_record_batch(reader, i, &error); + if (!record_batch) { + g_print("failed to open file reader: %s\n", error->message); + g_error_free(error); + g_object_unref(reader); + g_object_unref(input); + return EXIT_FAILURE; + } + + print_record_batch(record_batch); + g_object_unref(record_batch); + } + } + + g_object_unref(reader); + } + + g_object_unref(input); + + return EXIT_SUCCESS; +} diff --git a/src/arrow/c_glib/example/read-stream.c b/src/arrow/c_glib/example/read-stream.c new file mode 100644 index 000000000..133418faa --- /dev/null +++ b/src/arrow/c_glib/example/read-stream.c @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <stdlib.h> + +#include <arrow-glib/arrow-glib.h> + +static void +print_array(GArrowArray *array) +{ + GArrowType value_type; + gint64 i, n; + + value_type = garrow_array_get_value_type(array); + + g_print("["); + n = garrow_array_get_length(array); + +#define ARRAY_CASE(type, Type, TYPE, format) \ + case GARROW_TYPE_ ## TYPE: \ + { \ + GArrow ## Type ## Array *real_array; \ + real_array = GARROW_ ## TYPE ## _ARRAY(array); \ + for (i = 0; i < n; i++) { \ + if (i > 0) { \ + g_print(", "); \ + } \ + g_print(format, \ + garrow_ ## type ## _array_get_value(real_array, i)); \ + } \ + } \ + break + + switch (value_type) { + ARRAY_CASE(uint8, UInt8, UINT8, "%hhu"); + ARRAY_CASE(uint16, UInt16, UINT16, "%" G_GUINT16_FORMAT); + ARRAY_CASE(uint32, UInt32, UINT32, "%" G_GUINT32_FORMAT); + ARRAY_CASE(uint64, UInt64, UINT64, "%" G_GUINT64_FORMAT); + ARRAY_CASE( int8, Int8, INT8, "%hhd"); + ARRAY_CASE( int16, Int16, INT16, "%" G_GINT16_FORMAT); + ARRAY_CASE( int32, Int32, INT32, "%" G_GINT32_FORMAT); + ARRAY_CASE( int64, Int64, INT64, "%" G_GINT64_FORMAT); + ARRAY_CASE( float, Float, FLOAT, "%g"); + ARRAY_CASE(double, Double, DOUBLE, "%g"); + default: + break; + } +#undef ARRAY_CASE + + g_print("]\n"); +} + +static void +print_record_batch(GArrowRecordBatch *record_batch) +{ + guint nth_column, n_columns; + + n_columns = garrow_record_batch_get_n_columns(record_batch); + for (nth_column = 0; nth_column < n_columns; nth_column++) { + GArrowArray *array; + + g_print("columns[%u](%s): ", + nth_column, + garrow_record_batch_get_column_name(record_batch, nth_column)); + array = garrow_record_batch_get_column_data(record_batch, nth_column); + print_array(array); + g_object_unref(array); + } +} + +int +main(int argc, char **argv) +{ + const char *input_path = "/tmp/stream.arrow"; + GArrowMemoryMappedInputStream *input; + GError *error = NULL; + + if (argc > 1) + input_path = argv[1]; + input = garrow_memory_mapped_input_stream_new(input_path, &error); + if (!input) { + g_print("failed to open file: %s\n", error->message); + g_error_free(error); + return EXIT_FAILURE; + } + + { + GArrowRecordBatchReader *reader; + GArrowRecordBatchStreamReader *stream_reader; + + stream_reader = + garrow_record_batch_stream_reader_new(GARROW_INPUT_STREAM(input), + &error); + if (!stream_reader) { + g_print("failed to open stream reader: %s\n", error->message); + g_error_free(error); + g_object_unref(input); + return EXIT_FAILURE; + } + + reader = GARROW_RECORD_BATCH_READER(stream_reader); + while (TRUE) { + GArrowRecordBatch *record_batch; + + record_batch = garrow_record_batch_reader_read_next(reader, &error); + if (error) { + g_print("failed to read the next record batch: %s\n", error->message); + g_error_free(error); + g_object_unref(reader); + g_object_unref(input); + return EXIT_FAILURE; + } + + if (!record_batch) { + break; + } + + print_record_batch(record_batch); + g_object_unref(record_batch); + } + + g_object_unref(reader); + } + + g_object_unref(input); + + return EXIT_SUCCESS; +} diff --git a/src/arrow/c_glib/gandiva-glib/enums.c.template b/src/arrow/c_glib/gandiva-glib/enums.c.template new file mode 100644 index 000000000..7ea2ea7b5 --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/enums.c.template @@ -0,0 +1,52 @@ +/*** BEGIN file-header ***/ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <gandiva-glib/gandiva-glib.h> +/*** END file-header ***/ + +/*** BEGIN file-production ***/ + +/* enumerations from "@filename@" */ +/*** END file-production ***/ + +/*** BEGIN value-header ***/ +GType +@enum_name@_get_type(void) +{ + static GType etype = 0; + if (G_UNLIKELY(etype == 0)) { + static const G@Type@Value values[] = { +/*** END value-header ***/ + +/*** BEGIN value-production ***/ + {@VALUENAME@, "@VALUENAME@", "@valuenick@"}, +/*** END value-production ***/ + +/*** BEGIN value-tail ***/ + {0, NULL, NULL} + }; + etype = g_@type@_register_static(g_intern_static_string("@EnumName@"), values); + } + return etype; +} +/*** END value-tail ***/ + +/*** BEGIN file-tail ***/ +/*** END file-tail ***/ diff --git a/src/arrow/c_glib/gandiva-glib/enums.h.template b/src/arrow/c_glib/gandiva-glib/enums.h.template new file mode 100644 index 000000000..8d7b46303 --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/enums.h.template @@ -0,0 +1,41 @@ +/*** BEGIN file-header ***/ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <glib.h> + +G_BEGIN_DECLS +/*** END file-header ***/ + +/*** BEGIN file-production ***/ + +/* enumerations from "@filename@" */ +/*** END file-production ***/ + +/*** BEGIN value-header ***/ +GType @enum_name@_get_type(void) G_GNUC_CONST; +#define @ENUMPREFIX@_TYPE_@ENUMSHORT@ (@enum_name@_get_type()) +/*** END value-header ***/ + +/*** BEGIN file-tail ***/ + +G_END_DECLS +/*** END file-tail ***/ diff --git a/src/arrow/c_glib/gandiva-glib/expression.cpp b/src/arrow/c_glib/gandiva-glib/expression.cpp new file mode 100644 index 000000000..e4368f84d --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/expression.cpp @@ -0,0 +1,294 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/field.hpp> + +#include <gandiva-glib/expression.hpp> +#include <gandiva-glib/node.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: expression + * @title: Expression classes + * @include: gandiva-glib/gandiva-glib.h + * + * #GGandivaExpression is a class for an expression tree with a root node, + * and a result field. + * + * #GGandivaCondition is a class for an expression that returns boolean. + * + * Since: 0.12.0 + */ + +typedef struct GGandivaExpressionPrivate_ { + std::shared_ptr<gandiva::Expression> expression; + GGandivaNode *root_node; + GArrowField *result_field; +} GGandivaExpressionPrivate; + +enum { + PROP_EXPRESSION = 1, + PROP_ROOT_NODE, + PROP_RESULT_FIELD +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GGandivaExpression, + ggandiva_expression, + G_TYPE_OBJECT) + +#define GGANDIVA_EXPRESSION_GET_PRIVATE(object) \ + static_cast<GGandivaExpressionPrivate *>( \ + ggandiva_expression_get_instance_private( \ + GGANDIVA_EXPRESSION(object))) + +static void +ggandiva_expression_dispose(GObject *object) +{ + auto priv = GGANDIVA_EXPRESSION_GET_PRIVATE(object); + + if (priv->root_node) { + g_object_unref(priv->root_node); + priv->root_node = nullptr; + } + + if (priv->result_field) { + g_object_unref(priv->result_field); + priv->result_field = nullptr; + } + + G_OBJECT_CLASS(ggandiva_expression_parent_class)->dispose(object); +} + +static void +ggandiva_expression_finalize(GObject *object) +{ + auto priv = GGANDIVA_EXPRESSION_GET_PRIVATE(object); + + priv->expression.~shared_ptr(); + + G_OBJECT_CLASS(ggandiva_expression_parent_class)->finalize(object); +} + +static void +ggandiva_expression_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GGANDIVA_EXPRESSION_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_EXPRESSION: + priv->expression = + *static_cast<std::shared_ptr<gandiva::Expression> *>(g_value_get_pointer(value)); + break; + case PROP_ROOT_NODE: + priv->root_node = GGANDIVA_NODE(g_value_dup_object(value)); + break; + case PROP_RESULT_FIELD: + priv->result_field = GARROW_FIELD(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +ggandiva_expression_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GGANDIVA_EXPRESSION_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_ROOT_NODE: + g_value_set_object(value, priv->root_node); + break; + case PROP_RESULT_FIELD: + g_value_set_object(value, priv->result_field); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +ggandiva_expression_init(GGandivaExpression *object) +{ + auto priv = GGANDIVA_EXPRESSION_GET_PRIVATE(object); + new(&priv->expression) std::shared_ptr<gandiva::Expression>; +} + +static void +ggandiva_expression_class_init(GGandivaExpressionClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = ggandiva_expression_dispose; + gobject_class->finalize = ggandiva_expression_finalize; + gobject_class->set_property = ggandiva_expression_set_property; + gobject_class->get_property = ggandiva_expression_get_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("expression", + "Expression", + "The raw std::shared<gandiva::Expression> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_EXPRESSION, spec); + + spec = g_param_spec_object("root-node", + "Root Node", + "The root node for the expression", + GGANDIVA_TYPE_NODE, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_ROOT_NODE, spec); + + spec = g_param_spec_object("result-field", + "Result Field", + "The name and type of returned value as #GArrowField", + GARROW_TYPE_FIELD, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_RESULT_FIELD, spec); +} + +/** + * ggandiva_expression_new: + * @root_node: The root node for the expression. + * @result_field: The name and type of returned value as #GArrowField. + * + * Returns: A newly created #GGandivaExpression. + * + * Since: 0.12.0 + */ +GGandivaExpression * +ggandiva_expression_new(GGandivaNode *root_node, + GArrowField *result_field) +{ + auto gandiva_root_node = ggandiva_node_get_raw(root_node); + auto arrow_result_field = garrow_field_get_raw(result_field); + auto gandiva_expression = + gandiva::TreeExprBuilder::MakeExpression(gandiva_root_node, + arrow_result_field); + return ggandiva_expression_new_raw(&gandiva_expression, + root_node, + result_field); +} + +/** + * ggandiva_expression_to_string: + * @expression: A #GGandivaExpression. + * + * Returns: (transfer full): The string representation of the node in the expression tree. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 0.12.0 + */ +gchar * +ggandiva_expression_to_string(GGandivaExpression *expression) +{ + auto gandiva_expression = ggandiva_expression_get_raw(expression); + auto string = gandiva_expression->ToString(); + return g_strndup(string.data(), string.size()); +} + + +G_DEFINE_TYPE(GGandivaCondition, + ggandiva_condition, + GGANDIVA_TYPE_EXPRESSION) + +static void +ggandiva_condition_init(GGandivaCondition *object) +{ +} + +static void +ggandiva_condition_class_init(GGandivaConditionClass *klass) +{ +} + +/** + * ggandiva_condition_new: + * @root_node: The root node for the condition. + * + * Returns: A newly created #GGandivaCondition. + * + * Since: 4.0.0 + */ +GGandivaCondition * +ggandiva_condition_new(GGandivaNode *root_node) +{ + auto gandiva_root_node = ggandiva_node_get_raw(root_node); + auto gandiva_condition = + gandiva::TreeExprBuilder::MakeCondition(gandiva_root_node); + return ggandiva_condition_new_raw(&gandiva_condition, + root_node); +} + + +G_END_DECLS + +GGandivaExpression * +ggandiva_expression_new_raw(std::shared_ptr<gandiva::Expression> *gandiva_expression, + GGandivaNode *root_node, + GArrowField *result_field) +{ + auto expression = g_object_new(GGANDIVA_TYPE_EXPRESSION, + "expression", gandiva_expression, + "root-node", root_node, + "result-field", result_field, + NULL); + return GGANDIVA_EXPRESSION(expression); +} + +std::shared_ptr<gandiva::Expression> +ggandiva_expression_get_raw(GGandivaExpression *expression) +{ + auto priv = GGANDIVA_EXPRESSION_GET_PRIVATE(expression); + return priv->expression; +} + + +GGandivaCondition * +ggandiva_condition_new_raw(std::shared_ptr<gandiva::Condition> *gandiva_condition, + GGandivaNode *root_node) +{ + auto arrow_result_field = (*gandiva_condition)->result(); + auto result_field = garrow_field_new_raw(&arrow_result_field, nullptr); + auto condition = g_object_new(GGANDIVA_TYPE_CONDITION, + "expression", gandiva_condition, + "root-node", root_node, + "result-field", result_field, + NULL); + return GGANDIVA_CONDITION(condition); +} + +std::shared_ptr<gandiva::Condition> +ggandiva_condition_get_raw(GGandivaCondition *condition) +{ + return std::static_pointer_cast<gandiva::Condition>( + ggandiva_expression_get_raw(GGANDIVA_EXPRESSION(condition))); +} diff --git a/src/arrow/c_glib/gandiva-glib/expression.h b/src/arrow/c_glib/gandiva-glib/expression.h new file mode 100644 index 000000000..0a720d9af --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/expression.h @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/arrow-glib.h> + +#include <gandiva-glib/node.h> + +G_BEGIN_DECLS + +#define GGANDIVA_TYPE_EXPRESSION (ggandiva_expression_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaExpression, + ggandiva_expression, + GGANDIVA, + EXPRESSION, + GObject) + +struct _GGandivaExpressionClass +{ + GObjectClass parent_class; +}; + +GGandivaExpression * +ggandiva_expression_new(GGandivaNode *root_node, + GArrowField *result_field); +gchar *ggandiva_expression_to_string(GGandivaExpression *expression); + + +#define GGANDIVA_TYPE_CONDITION (ggandiva_condition_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaCondition, + ggandiva_condition, + GGANDIVA, + CONDITION, + GGandivaExpression) + +struct _GGandivaConditionClass +{ + GGandivaExpressionClass parent_class; +}; + +GGANDIVA_AVAILABLE_IN_4_0 +GGandivaCondition * +ggandiva_condition_new(GGandivaNode *root_node); + + +G_END_DECLS diff --git a/src/arrow/c_glib/gandiva-glib/expression.hpp b/src/arrow/c_glib/gandiva-glib/expression.hpp new file mode 100644 index 000000000..45b659393 --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/expression.hpp @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <memory> + +#include <gandiva/expression.h> +#include <gandiva/tree_expr_builder.h> + +#include <gandiva-glib/expression.h> + +GGandivaExpression * +ggandiva_expression_new_raw(std::shared_ptr<gandiva::Expression> *gandiva_expression, + GGandivaNode *root_node, + GArrowField *result_field); +std::shared_ptr<gandiva::Expression> ggandiva_expression_get_raw(GGandivaExpression *expression); + +GGandivaCondition +*ggandiva_condition_new_raw(std::shared_ptr<gandiva::Condition> *gandiva_expression, + GGandivaNode *root_node); +std::shared_ptr<gandiva::Condition> +ggandiva_condition_get_raw(GGandivaCondition *condition); diff --git a/src/arrow/c_glib/gandiva-glib/filter.cpp b/src/arrow/c_glib/gandiva-glib/filter.cpp new file mode 100644 index 000000000..baed69946 --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/filter.cpp @@ -0,0 +1,257 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <limits> + +#include <arrow-glib/basic-array.hpp> +#include <arrow-glib/error.hpp> +#include <arrow-glib/record-batch.hpp> +#include <arrow-glib/schema.hpp> + +#include <gandiva-glib/expression.hpp> +#include <gandiva-glib/filter.hpp> +#include <gandiva-glib/selection-vector.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: filter + * @title: Filter classes + * @include: gandiva-glib/gandiva-glib.h + * + * #GGandivaFilter is a class for selecting records by a specific + * condition. + * + * Since: 4.0.0 + */ + +typedef struct GGandivaFilterPrivate_ { + std::shared_ptr<gandiva::Filter> filter; + GArrowSchema *schema; + GGandivaCondition *condition; +} GGandivaFilterPrivate; + +enum { + PROP_FILTER = 1, + PROP_SCHEMA, + PROP_CONDITION, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GGandivaFilter, + ggandiva_filter, + G_TYPE_OBJECT) + +#define GGANDIVA_FILTER_GET_PRIVATE(obj) \ + static_cast<GGandivaFilterPrivate *>( \ + ggandiva_filter_get_instance_private( \ + GGANDIVA_FILTER(obj))) + +static void +ggandiva_filter_dispose(GObject *object) +{ + auto priv = GGANDIVA_FILTER_GET_PRIVATE(object); + + if (priv->schema) { + g_object_unref(priv->schema); + priv->schema = nullptr; + } + + if (priv->condition) { + g_object_unref(priv->condition); + priv->condition = nullptr; + } + + G_OBJECT_CLASS(ggandiva_filter_parent_class)->dispose(object); +} + +static void +ggandiva_filter_finalize(GObject *object) +{ + auto priv = GGANDIVA_FILTER_GET_PRIVATE(object); + + priv->filter.~shared_ptr(); + + G_OBJECT_CLASS(ggandiva_filter_parent_class)->finalize(object); +} + +static void +ggandiva_filter_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GGANDIVA_FILTER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_FILTER: + priv->filter = + *static_cast<std::shared_ptr<gandiva::Filter> *>(g_value_get_pointer(value)); + break; + case PROP_SCHEMA: + priv->schema = GARROW_SCHEMA(g_value_dup_object(value)); + break; + case PROP_CONDITION: + priv->condition = GGANDIVA_CONDITION(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +ggandiva_filter_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GGANDIVA_FILTER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_SCHEMA: + g_value_set_object(value, priv->schema); + break; + case PROP_CONDITION: + g_value_set_object(value, priv->condition); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +ggandiva_filter_init(GGandivaFilter *object) +{ + auto priv = GGANDIVA_FILTER_GET_PRIVATE(object); + new(&priv->filter) std::shared_ptr<gandiva::Filter>; +} + +static void +ggandiva_filter_class_init(GGandivaFilterClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = ggandiva_filter_dispose; + gobject_class->finalize = ggandiva_filter_finalize; + gobject_class->set_property = ggandiva_filter_set_property; + gobject_class->get_property = ggandiva_filter_get_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("filter", + "Filter", + "The raw std::shared<gandiva::Filter> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_FILTER, spec); + + spec = g_param_spec_object("schema", + "Schema", + "The schema for input record batch", + GARROW_TYPE_SCHEMA, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_SCHEMA, spec); + + spec = g_param_spec_object("condition", + "Condition", + "The condition for the filter", + GGANDIVA_TYPE_CONDITION, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_CONDITION, spec); +} + +/** + * ggandiva_filter_new: + * @schema: A #GArrowSchema. + * @condition: The condition to be used. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GGandivaFilter on success, + * %NULL on error. + * + * Since: 4.0.0 + */ +GGandivaFilter * +ggandiva_filter_new(GArrowSchema *schema, + GGandivaCondition *condition, + GError **error) +{ + auto arrow_schema = garrow_schema_get_raw(schema); + auto gandiva_condition = ggandiva_condition_get_raw(condition); + std::shared_ptr<gandiva::Filter> gandiva_filter; + auto status = gandiva::Filter::Make(arrow_schema, + gandiva_condition, + &gandiva_filter); + if (garrow_error_check(error, status, "[gandiva][filter][new]")) { + return ggandiva_filter_new_raw(&gandiva_filter, schema, condition); + } else { + return NULL; + } +} + +/** + * ggandiva_filter_evaluate: + * @filter: A #GGandivaFilter. + * @record_batch: A #GArrowRecordBatch. + * @selection_vector: A #GGandivaSelectionVector that is used as + * output. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE otherwise. + * + * Since: 4.0.0 + */ +gboolean +ggandiva_filter_evaluate(GGandivaFilter *filter, + GArrowRecordBatch *record_batch, + GGandivaSelectionVector *selection_vector, + GError **error) +{ + auto gandiva_filter = ggandiva_filter_get_raw(filter); + auto arrow_record_batch = garrow_record_batch_get_raw(record_batch); + auto gandiva_selection_vector = + ggandiva_selection_vector_get_raw(selection_vector); + auto status = gandiva_filter->Evaluate(*arrow_record_batch, + gandiva_selection_vector); + return garrow_error_check(error, status, "[gandiva][filter][evaluate]"); +} + +G_END_DECLS + +GGandivaFilter * +ggandiva_filter_new_raw(std::shared_ptr<gandiva::Filter> *gandiva_filter, + GArrowSchema *schema, + GGandivaCondition *condition) +{ + auto filter = g_object_new(GGANDIVA_TYPE_FILTER, + "filter", gandiva_filter, + "schema", schema, + "condition", condition, + NULL); + return GGANDIVA_FILTER(filter); +} + +std::shared_ptr<gandiva::Filter> +ggandiva_filter_get_raw(GGandivaFilter *filter) +{ + auto priv = GGANDIVA_FILTER_GET_PRIVATE(filter); + return priv->filter; +} diff --git a/src/arrow/c_glib/gandiva-glib/filter.h b/src/arrow/c_glib/gandiva-glib/filter.h new file mode 100644 index 000000000..9a0a5dc5d --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/filter.h @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <gandiva-glib/expression.h> +#include <gandiva-glib/selection-vector.h> + +G_BEGIN_DECLS + +#define GGANDIVA_TYPE_FILTER (ggandiva_filter_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaFilter, + ggandiva_filter, + GGANDIVA, + FILTER, + GObject) + +struct _GGandivaFilterClass +{ + GObjectClass parent_class; +}; + +GGandivaFilter * +ggandiva_filter_new(GArrowSchema *schema, + GGandivaCondition *condition, + GError **error); +gboolean +ggandiva_filter_evaluate(GGandivaFilter *filter, + GArrowRecordBatch *record_batch, + GGandivaSelectionVector *selection_vector, + GError **error); + +G_END_DECLS diff --git a/src/arrow/c_glib/gandiva-glib/filter.hpp b/src/arrow/c_glib/gandiva-glib/filter.hpp new file mode 100644 index 000000000..a0bee9120 --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/filter.hpp @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <memory> + +#include <gandiva/filter.h> + +#include <gandiva-glib/filter.h> + +GGandivaFilter * +ggandiva_filter_new_raw(std::shared_ptr<gandiva::Filter> *gandiva_filter, + GArrowSchema *schema, + GGandivaCondition *condition); +std::shared_ptr<gandiva::Filter> +ggandiva_filter_get_raw(GGandivaFilter *filter); diff --git a/src/arrow/c_glib/gandiva-glib/function-registry.cpp b/src/arrow/c_glib/gandiva-glib/function-registry.cpp new file mode 100644 index 000000000..a95019bd6 --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/function-registry.cpp @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <gandiva/function_registry.h> +#include <gandiva-glib/function-registry.h> + +#include <gandiva-glib/function-signature.hpp> +#include <gandiva-glib/native-function.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: function-registry + * @short_description: FunctionRegistry class + * @title: FunctionRegistry class + * @include: gandiva-glib/gandiva-glib.h + * + * Since: 0.14.0 + */ + +G_DEFINE_TYPE(GGandivaFunctionRegistry, + ggandiva_function_registry, + G_TYPE_OBJECT) + +static void +ggandiva_function_registry_init(GGandivaFunctionRegistry *object) +{ +} + +static void +ggandiva_function_registry_class_init(GGandivaFunctionRegistryClass *klass) +{ +} + +/** + * ggandiva_function_registry_new: + * + * Returns: A newly created #GGandivaFunctionRegistry. + * + * Since: 0.14.0 + */ +GGandivaFunctionRegistry * +ggandiva_function_registry_new(void) +{ + return GGANDIVA_FUNCTION_REGISTRY(g_object_new(GGANDIVA_TYPE_FUNCTION_REGISTRY, NULL)); +} + +/** + * ggandiva_function_registry_lookup: + * @function_registry: A #GGandivaFunctionRegistry. + * @function_signature: A #GGandivaFunctionSignature to be looked up. + * + * Returns: (transfer full) (nullable): + * The native functions associated to the given #GGandivaFunctionSignature. + * + * Since: 0.14.0 + */ +GGandivaNativeFunction * +ggandiva_function_registry_lookup(GGandivaFunctionRegistry *function_registry, + GGandivaFunctionSignature *function_signature) +{ + gandiva::FunctionRegistry gandiva_function_registry; + auto gandiva_function_signature = + ggandiva_function_signature_get_raw(function_signature); + auto gandiva_native_function = + gandiva_function_registry.LookupSignature(*gandiva_function_signature); + if (gandiva_native_function) { + return ggandiva_native_function_new_raw(gandiva_native_function); + } else { + return NULL; + } +} + +/** + * ggandiva_function_registry_get_native_functions: + * @function_registry: A #GGandivaFunctionRegistry. + * + * Returns: (transfer full) (element-type GGandivaNativeFunction): + * The native functions in the function registry. + * + * Since: 0.14.0 + */ +GList * +ggandiva_function_registry_get_native_functions(GGandivaFunctionRegistry *function_registry) +{ + gandiva::FunctionRegistry gandiva_function_registry; + + GList *native_functions = nullptr; + for (auto gandiva_native_function = gandiva_function_registry.begin(); + gandiva_native_function != gandiva_function_registry.end(); + ++gandiva_native_function) { + auto native_function = ggandiva_native_function_new_raw(gandiva_native_function); + native_functions = g_list_prepend(native_functions, native_function); + } + native_functions = g_list_reverse(native_functions); + + return native_functions; +} + +G_END_DECLS diff --git a/src/arrow/c_glib/gandiva-glib/function-registry.h b/src/arrow/c_glib/gandiva-glib/function-registry.h new file mode 100644 index 000000000..1a0d767d4 --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/function-registry.h @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <gandiva-glib/native-function.h> + +G_BEGIN_DECLS + +#define GGANDIVA_TYPE_FUNCTION_REGISTRY (ggandiva_function_registry_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaFunctionRegistry, + ggandiva_function_registry, + GGANDIVA, + FUNCTION_REGISTRY, + GObject) + +struct _GGandivaFunctionRegistryClass +{ + GObjectClass parent_class; +}; + +GGandivaFunctionRegistry *ggandiva_function_registry_new(void); +GGandivaNativeFunction * +ggandiva_function_registry_lookup(GGandivaFunctionRegistry *function_registry, + GGandivaFunctionSignature *function_signature); +GList *ggandiva_function_registry_get_native_functions(GGandivaFunctionRegistry *function_registry); + +G_END_DECLS diff --git a/src/arrow/c_glib/gandiva-glib/function-signature.cpp b/src/arrow/c_glib/gandiva-glib/function-signature.cpp new file mode 100644 index 000000000..c344f3a92 --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/function-signature.cpp @@ -0,0 +1,243 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/basic-data-type.hpp> + +#include <gandiva-glib/function-signature.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: function-signature + * @short_description: FunctionSignature class + * @title: FunctionSignature class + * @include: gandiva-glib/gandiva-glib.h + * + * Since: 0.14.0 + */ + +typedef struct GGandivaFunctionSignaturePrivate_ { + gandiva::FunctionSignature function_signature; +} GGandivaFunctionSignaturePrivate; + +enum { + PROP_FUNCTION_SIGNATURE = 1 +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GGandivaFunctionSignature, + ggandiva_function_signature, + G_TYPE_OBJECT) + +#define GGANDIVA_FUNCTION_SIGNATURE_GET_PRIVATE(obj) \ + static_cast<GGandivaFunctionSignaturePrivate *>( \ + ggandiva_function_signature_get_instance_private( \ + GGANDIVA_FUNCTION_SIGNATURE(obj))) + +static void +ggandiva_function_signature_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GGANDIVA_FUNCTION_SIGNATURE_GET_PRIVATE(object); + switch (prop_id) { + case PROP_FUNCTION_SIGNATURE: + priv->function_signature = + *static_cast<const gandiva::FunctionSignature *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +ggandiva_function_signature_init(GGandivaFunctionSignature *object) +{ +} + +static void +ggandiva_function_signature_class_init(GGandivaFunctionSignatureClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = ggandiva_function_signature_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("function-signature", + "FunctionSignature", + "The raw gandiva::FunctionSignature *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_FUNCTION_SIGNATURE, spec); +} + +/** + * ggandiva_function_signature_new: + * @base_name: A base name of a function. + * @parameter_types: (element-type GArrowDataType) + * A list of parameter data types. + * @return_type: A return data type. + * + * Returns: + * A #GGandivaFunctionSignature. + * + * Since: 0.14.0 + */ +GGandivaFunctionSignature * +ggandiva_function_signature_new(const gchar *base_name, + GList *parameter_types, + GArrowDataType *return_type) +{ + gandiva::DataTypeVector arrow_parameter_types; + for (auto node = parameter_types; node; node = g_list_next(node)) { + auto data_type = GARROW_DATA_TYPE(node->data); + auto arrow_data_type = garrow_data_type_get_raw(data_type); + arrow_parameter_types.push_back(arrow_data_type); + } + + auto arrow_return_type = garrow_data_type_get_raw(return_type); + + gandiva::FunctionSignature gandiva_function_signature(base_name, + arrow_parameter_types, + arrow_return_type); + return ggandiva_function_signature_new_raw(&gandiva_function_signature); +} + +/** + * ggandiva_function_signature_equal: + * @function_signature: A #GGandivaFunctionSignature. + * @other_function_signature: A #GGandivaFunctionSignature to be compared. + * + * Returns: %TRUE if both of them have the same data, %FALSE otherwise. + * + * Since: 0.14.0 + */ +gboolean +ggandiva_function_signature_equal(GGandivaFunctionSignature *function_signature, + GGandivaFunctionSignature *other_function_signature) +{ + auto gandiva_function_signature = + ggandiva_function_signature_get_raw(function_signature); + auto gandiva_other_function_signature = + ggandiva_function_signature_get_raw(other_function_signature); + + return (*gandiva_function_signature) == (*gandiva_other_function_signature); +} + +/** + * ggandiva_function_signature_to_string: + * @function_signature: A #GGandivaFunctionSignature + * + * Returns: (transfer full): The string representation of the function signature. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 0.14.0 + */ +gchar * +ggandiva_function_signature_to_string(GGandivaFunctionSignature *function_signature) +{ + auto gandiva_function_signature = + ggandiva_function_signature_get_raw(function_signature); + return g_strdup(gandiva_function_signature->ToString().c_str()); +} + +/** + * ggandiva_function_signature_get_return_type: + * @function_signature: A #GGandivaFunctionSignature + * + * Returns: (transfer full): + * A #GArrowDataType of the return value of the function signature. + * + * Since: 0.14.0 + */ +GArrowDataType * +ggandiva_function_signature_get_return_type(GGandivaFunctionSignature *function_signature) +{ + auto gandiva_function_signature = + ggandiva_function_signature_get_raw(function_signature); + auto arrow_data_type = gandiva_function_signature->ret_type(); + auto data_type = garrow_data_type_new_raw(&arrow_data_type); + return data_type; +} + +/** + * ggandiva_function_signature_get_base_name: + * @function_signature: A #GGandivaFunctionSignature + * + * Returns: (transfer full): A base name of the function signature. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 0.14.0 + */ +gchar * +ggandiva_function_signature_get_base_name(GGandivaFunctionSignature *function_signature) +{ + auto gandiva_function_signature = + ggandiva_function_signature_get_raw(function_signature); + return g_strdup(gandiva_function_signature->base_name().c_str()); +} + +/** + * ggandiva_function_signature_get_param_types: + * @function_signature: A #GGandivaFunctionSignature + * + * Returns: (transfer full) (element-type GArrowDataType): + * A list of parameter data types of the function signature. + * + * Since: 0.14.0 + */ +GList * +ggandiva_function_signature_get_param_types(GGandivaFunctionSignature *function_signature) +{ + auto gandiva_function_signature = + ggandiva_function_signature_get_raw(function_signature); + + GList *param_type_list = nullptr; + auto arrow_param_types = gandiva_function_signature->param_types(); + for (auto &arrow_param_type : arrow_param_types) { + auto data_type = garrow_data_type_new_raw(&arrow_param_type); + param_type_list = g_list_prepend(param_type_list, data_type); + } + param_type_list = g_list_reverse(param_type_list); + + return param_type_list; +} + +G_END_DECLS + +GGandivaFunctionSignature * +ggandiva_function_signature_new_raw(const gandiva::FunctionSignature *gandiva_function_signature) +{ + auto function_signature = + GGANDIVA_FUNCTION_SIGNATURE(g_object_new(GGANDIVA_TYPE_FUNCTION_SIGNATURE, + "function-signature", + gandiva_function_signature, + NULL)); + return function_signature; +} + +const gandiva::FunctionSignature * +ggandiva_function_signature_get_raw(GGandivaFunctionSignature *function_signature) +{ + auto priv = GGANDIVA_FUNCTION_SIGNATURE_GET_PRIVATE(function_signature); + return &priv->function_signature; +} diff --git a/src/arrow/c_glib/gandiva-glib/function-signature.h b/src/arrow/c_glib/gandiva-glib/function-signature.h new file mode 100644 index 000000000..b1099ff99 --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/function-signature.h @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/arrow-glib.h> + +G_BEGIN_DECLS + +#define GGANDIVA_TYPE_FUNCTION_SIGNATURE (ggandiva_function_signature_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaFunctionSignature, + ggandiva_function_signature, + GGANDIVA, + FUNCTION_SIGNATURE, + GObject) + +struct _GGandivaFunctionSignatureClass +{ + GObjectClass parent_class; +}; + +GGandivaFunctionSignature *ggandiva_function_signature_new(const gchar *base_name, + GList *parameter_types, + GArrowDataType *return_type); +gboolean ggandiva_function_signature_equal(GGandivaFunctionSignature *function_signature, + GGandivaFunctionSignature *other_function_signature); +gchar *ggandiva_function_signature_to_string(GGandivaFunctionSignature *function_signature); +GArrowDataType *ggandiva_function_signature_get_return_type(GGandivaFunctionSignature *function_signature); +gchar *ggandiva_function_signature_get_base_name(GGandivaFunctionSignature *function_signature); +GList *ggandiva_function_signature_get_param_types(GGandivaFunctionSignature *function_signature); + +G_END_DECLS diff --git a/src/arrow/c_glib/gandiva-glib/function-signature.hpp b/src/arrow/c_glib/gandiva-glib/function-signature.hpp new file mode 100644 index 000000000..24f71e6bc --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/function-signature.hpp @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <gandiva/function_signature.h> + +#include <gandiva-glib/function-signature.h> + +GGandivaFunctionSignature *ggandiva_function_signature_new_raw(const gandiva::FunctionSignature *gandiva_function_signature); +const gandiva::FunctionSignature *ggandiva_function_signature_get_raw(GGandivaFunctionSignature *signature); diff --git a/src/arrow/c_glib/gandiva-glib/gandiva-glib.h b/src/arrow/c_glib/gandiva-glib/gandiva-glib.h new file mode 100644 index 000000000..9c1a1604d --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/gandiva-glib.h @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <gandiva-glib/version.h> + +#include <gandiva-glib/expression.h> +#include <gandiva-glib/filter.h> +#include <gandiva-glib/function-registry.h> +#include <gandiva-glib/function-signature.h> +#include <gandiva-glib/native-function.h> +#include <gandiva-glib/node.h> +#include <gandiva-glib/projector.h> +#include <gandiva-glib/selection-vector.h> diff --git a/src/arrow/c_glib/gandiva-glib/gandiva-glib.hpp b/src/arrow/c_glib/gandiva-glib/gandiva-glib.hpp new file mode 100644 index 000000000..eb39f5838 --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/gandiva-glib.hpp @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <gandiva-glib/gandiva-glib.h> + +#include <gandiva-glib/expression.hpp> +#include <gandiva-glib/filter.hpp> +#include <gandiva-glib/node.hpp> +#include <gandiva-glib/projector.hpp> +#include <gandiva-glib/selection-vector.hpp> diff --git a/src/arrow/c_glib/gandiva-glib/meson.build b/src/arrow/c_glib/gandiva-glib/meson.build new file mode 100644 index 000000000..5127d67af --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/meson.build @@ -0,0 +1,120 @@ +# -*- indent-tabs-mode: nil -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +project_name = 'gandiva-glib' + +sources = files( + 'expression.cpp', + 'filter.cpp', + 'function-registry.cpp', + 'function-signature.cpp', + 'native-function.cpp', + 'node.cpp', + 'projector.cpp', + 'selection-vector.cpp', +) + +c_headers = files( + 'expression.h', + 'filter.h', + 'function-registry.h', + 'function-signature.h', + 'gandiva-glib.h', + 'native-function.h', + 'node.h', + 'projector.h', + 'selection-vector.h', +) + +cpp_headers = files( + 'expression.hpp', + 'filter.hpp', + 'function-signature.hpp', + 'gandiva-glib.hpp', + 'native-function.hpp', + 'node.hpp', + 'projector.hpp', + 'selection-vector.hpp', +) + +version_h_conf = configuration_data() +version_h_conf.set('GGANDIVA_VERSION_MAJOR', version_major) +version_h_conf.set('GGANDIVA_VERSION_MINOR', version_minor) +version_h_conf.set('GGANDIVA_VERSION_MICRO', version_micro) +version_h_conf.set('GGANDIVA_VERSION_TAG', version_tag) +version_h = configure_file(input: 'version.h.in', + output: 'version.h', + configuration: version_h_conf) +c_headers += version_h + +enums = gnome.mkenums('enums', + sources: c_headers, + identifier_prefix: 'GGandiva', + symbol_prefix: 'ggandiva', + c_template: 'enums.c.template', + h_template: 'enums.h.template', + install_dir: join_paths(include_dir, meson.project_name()), + install_header: true) +enums_source = enums[0] +enums_header = enums[1] + +headers = c_headers + cpp_headers +install_headers(headers, subdir: project_name) + +dependencies = [ + gandiva, + arrow_glib, +] +libgandiva_glib = library('gandiva-glib', + sources: sources + enums, + install: true, + dependencies: dependencies, + include_directories: base_include_directories, + soversion: so_version, + version: library_version) +gandiva_glib = declare_dependency(link_with: libgandiva_glib, + include_directories: base_include_directories, + dependencies: dependencies, + sources: enums_header) + +pkgconfig.generate(libgandiva_glib, + filebase: project_name, + name: 'Apache Arrow Gandiva GLib', + description: 'C API for Apache Arrow Gandiva based on GLib', + version: version, + requires: ['gandiva', 'arrow-glib']) + +if have_gi + gnome.generate_gir(libgandiva_glib, + dependencies: declare_dependency(sources: arrow_glib_gir), + sources: sources + c_headers + enums, + namespace: 'Gandiva', + nsversion: api_version, + identifier_prefix: 'GGandiva', + symbol_prefix: 'ggandiva', + export_packages: 'gandiva-glib', + includes: [ + 'Arrow-1.0' + ], + install: true, + extra_args: [ + '--warn-all', + '--include-uninstalled=./arrow-glib/Arrow-1.0.gir', + ]) +endif diff --git a/src/arrow/c_glib/gandiva-glib/native-function.cpp b/src/arrow/c_glib/gandiva-glib/native-function.cpp new file mode 100644 index 000000000..0755ad1d6 --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/native-function.cpp @@ -0,0 +1,282 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <gandiva-glib/native-function.hpp> + +#include <gandiva-glib/function-signature.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: native-function + * @short_description: NativeFunction class + * @title: NativeFunction class + * @include: gandiva-glib/gandiva-glib.h + * + * Since: 0.14.0 + */ + +typedef struct GGandivaNativeFunctionPrivate_ { + const gandiva::NativeFunction *native_function; +} GGandivaNativeFunctionPrivate; + +enum { + PROP_NATIVE_FUNCTION = 1 +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GGandivaNativeFunction, + ggandiva_native_function, + G_TYPE_OBJECT) + +#define GGANDIVA_NATIVE_FUNCTION_GET_PRIVATE(obj) \ + static_cast<GGandivaNativeFunctionPrivate *>( \ + ggandiva_native_function_get_instance_private( \ + GGANDIVA_NATIVE_FUNCTION(obj))) + +static void +ggandiva_native_function_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GGANDIVA_NATIVE_FUNCTION_GET_PRIVATE(object); + switch (prop_id) { + case PROP_NATIVE_FUNCTION: + priv->native_function = + static_cast<const gandiva::NativeFunction *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +ggandiva_native_function_init(GGandivaNativeFunction *object) +{ +} + +static void +ggandiva_native_function_class_init(GGandivaNativeFunctionClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = ggandiva_native_function_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("native-function", + "NativeFunction", + "The raw gandiva::NativeFunction *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_NATIVE_FUNCTION, spec); +} + +/** + * ggandiva_native_function_get_signatures: + * @native_function: A #GGandivaNativeFunction. + * + * Returns: (element-type GGandivaFunctionSignature) (transfer full): + * A list of #GGandivaFunctionSignature supported by the native function. + * + * Since: 0.15.0 + */ +GList * +ggandiva_native_function_get_signatures(GGandivaNativeFunction *native_function) +{ + auto gandiva_native_function = + ggandiva_native_function_get_raw(native_function); + GList *signatures = nullptr; + for (auto &gandiva_signature : gandiva_native_function->signatures()) { + auto signature = ggandiva_function_signature_new_raw(&gandiva_signature); + signatures = g_list_prepend(signatures, signature); + } + return g_list_reverse(signatures); +} + +/** + * ggandiva_native_function_equal: + * @native_function: A #GGandivaNativeFunction. + * @other_native_function: A #GGandivaNativeFunction to be compared. + * + * Returns: %TRUE if both of them have the same data, %FALSE otherwise. + * + * Since: 0.14.0 + */ +gboolean +ggandiva_native_function_equal(GGandivaNativeFunction *native_function, + GGandivaNativeFunction *other_native_function) +{ + auto gandiva_native_function = + ggandiva_native_function_get_raw(native_function); + auto gandiva_other_native_function = + ggandiva_native_function_get_raw(other_native_function); + return gandiva_native_function == gandiva_other_native_function; +} + +/** + * ggandiva_native_function_to_string: + * @native_function: A #GGandivaNativeFunction. + * + * Returns: (transfer full): + * The string representation of the signatures of the native function. + * It should be freed with g_free() when no longer needed. + * + * Since: 0.14.0 + */ +gchar * +ggandiva_native_function_to_string(GGandivaNativeFunction *native_function) +{ + auto gandiva_native_function = + ggandiva_native_function_get_raw(native_function); + auto string = g_string_new(NULL); + for (auto &gandiva_signature : gandiva_native_function->signatures()) { + if (string->len > 0) { + g_string_append(string, ", "); + } + const auto &signature_string = gandiva_signature.ToString(); + g_string_append_len(string, + signature_string.data(), + signature_string.length()); + } + return g_string_free(string, FALSE); +} + +/** + * ggandiva_native_function_get_result_nullable_type: + * @native_function: A #GGandivaNativeFunction. + * + * Returns: + * A value of #GGandivaResultNullableType. + * + * Since: 0.14.0 + */ +GGandivaResultNullableType +ggandiva_native_function_get_result_nullable_type(GGandivaNativeFunction *native_function) +{ + auto gandiva_native_function = + ggandiva_native_function_get_raw(native_function); + const auto gandiva_result_nullable_type = + gandiva_native_function->result_nullable_type(); + return ggandiva_result_nullable_type_from_raw(gandiva_result_nullable_type); +} + +/** + * ggandiva_native_function_need_context: + * @native_function: A #GGandivaNativeFunction. + * + * Returns: + * %TRUE if the native function needs a context for evaluation, + * %FALSE otherwise. + * + * Since: 0.14.0 + */ +gboolean +ggandiva_native_function_need_context(GGandivaNativeFunction *native_function) +{ + auto gandiva_native_function = + ggandiva_native_function_get_raw(native_function); + return gandiva_native_function->NeedsContext(); +} + +/** + * ggandiva_native_function_need_function_holder: + * @native_function: A #GGandivaNativeFunction. + * + * Returns: + * %TRUE if the native function needs a function holder for evaluation, + * %FALSE otherwise. + * + * Since: 0.14.0 + */ +gboolean +ggandiva_native_function_need_function_holder(GGandivaNativeFunction *native_function) +{ + auto gandiva_native_function = + ggandiva_native_function_get_raw(native_function); + return gandiva_native_function->NeedsFunctionHolder(); +} + +/** + * ggandiva_native_function_can_return_errors: + * @native_function: A #GGandivaNativeFunction. + * + * Returns: + * %TRUE if the native function has the possibility of returning errors, + * %FALSE otherwise. + * + * Since: 0.14.0 + */ +gboolean +ggandiva_native_function_can_return_errors(GGandivaNativeFunction *native_function) +{ + auto gandiva_native_function = + ggandiva_native_function_get_raw(native_function); + return gandiva_native_function->CanReturnErrors(); +} + +G_END_DECLS + +GGandivaResultNullableType +ggandiva_result_nullable_type_from_raw(gandiva::ResultNullableType gandiva_type) +{ + switch (gandiva_type) { + case gandiva::kResultNullIfNull: + return GGANDIVA_RESULT_NULL_IF_NULL; + case gandiva::kResultNullNever: + return GGANDIVA_RESULT_NULL_NEVER; + case gandiva::kResultNullInternal: + return GGANDIVA_RESULT_NULL_INTERNAL; + default: + return GGANDIVA_RESULT_NULL_IF_NULL; + } +} + +gandiva::ResultNullableType +ggandiva_result_nullable_type_to_raw(GGandivaResultNullableType type) +{ + switch (type) { + case GGANDIVA_RESULT_NULL_IF_NULL: + return gandiva::kResultNullIfNull; + case GGANDIVA_RESULT_NULL_NEVER: + return gandiva::kResultNullNever; + case GGANDIVA_RESULT_NULL_INTERNAL: + return gandiva::kResultNullInternal; + default: + return gandiva::kResultNullIfNull; + } +} + +GGandivaNativeFunction * +ggandiva_native_function_new_raw(const gandiva::NativeFunction *gandiva_native_function) +{ + auto native_function = + GGANDIVA_NATIVE_FUNCTION(g_object_new(GGANDIVA_TYPE_NATIVE_FUNCTION, + "native-function", + gandiva_native_function, + NULL)); + return native_function; +} + +const gandiva::NativeFunction * +ggandiva_native_function_get_raw(GGandivaNativeFunction *native_function) +{ + auto priv = GGANDIVA_NATIVE_FUNCTION_GET_PRIVATE(native_function); + return priv->native_function; +} diff --git a/src/arrow/c_glib/gandiva-glib/native-function.h b/src/arrow/c_glib/gandiva-glib/native-function.h new file mode 100644 index 000000000..8b4d6a44c --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/native-function.h @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <gandiva-glib/function-signature.h> + +G_BEGIN_DECLS + +/** + * GGandivaResultNullableType: + * @GGANDIVA_RESULT_NULL_IF_NULL: This means the result validity is an intersection of + * the validity of the children. + * @GGANDIVA_RESULT_NULL_NEVER: This means that the result is always valid. + * @GGANDIVA_RESULT_NULL_INTERNAL: This means that the result validity depends on some + * internal logic. + * + * They are corresponding to `gandiva::ResultNullableType` values. + */ +typedef enum { + GGANDIVA_RESULT_NULL_IF_NULL, + GGANDIVA_RESULT_NULL_NEVER, + GGANDIVA_RESULT_NULL_INTERNAL +} GGandivaResultNullableType; + +#define GGANDIVA_TYPE_NATIVE_FUNCTION (ggandiva_native_function_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaNativeFunction, + ggandiva_native_function, + GGANDIVA, + NATIVE_FUNCTION, + GObject) + +struct _GGandivaNativeFunctionClass +{ + GObjectClass parent_class; +}; + +GList * +ggandiva_native_function_get_signatures(GGandivaNativeFunction *native_function); +gboolean +ggandiva_native_function_equal(GGandivaNativeFunction *native_function, + GGandivaNativeFunction *other_native_function); +gchar *ggandiva_native_function_to_string(GGandivaNativeFunction *native_function); +GGandivaResultNullableType ggandiva_native_function_get_result_nullable_type(GGandivaNativeFunction *native_function); +gboolean ggandiva_native_function_need_context(GGandivaNativeFunction *native_function); +gboolean ggandiva_native_function_need_function_holder(GGandivaNativeFunction *native_function); +gboolean ggandiva_native_function_can_return_errors(GGandivaNativeFunction *native_function); + +G_END_DECLS diff --git a/src/arrow/c_glib/gandiva-glib/native-function.hpp b/src/arrow/c_glib/gandiva-glib/native-function.hpp new file mode 100644 index 000000000..76119ca40 --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/native-function.hpp @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <gandiva/native_function.h> + +#include <gandiva-glib/native-function.h> + +G_BEGIN_DECLS + +GGandivaResultNullableType +ggandiva_result_nullable_type_from_raw(gandiva::ResultNullableType gandiva_type); +gandiva::ResultNullableType +ggandiva_result_nullable_type_to_raw(GGandivaResultNullableType type); + +GGandivaNativeFunction *ggandiva_native_function_new_raw(const gandiva::NativeFunction *gandiva_native_function); +const gandiva::NativeFunction *ggandiva_native_function_get_raw(GGandivaNativeFunction *native_function); + +G_END_DECLS diff --git a/src/arrow/c_glib/gandiva-glib/node.cpp b/src/arrow/c_glib/gandiva-glib/node.cpp new file mode 100644 index 000000000..d42d4801b --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/node.cpp @@ -0,0 +1,1688 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/data-type.hpp> +#include <arrow-glib/error.hpp> +#include <arrow-glib/field.hpp> + +#include <gandiva-glib/node.hpp> + +template <typename Type> +const Type & +ggandiva_literal_node_get(GGandivaLiteralNode *node) +{ + auto gandiva_literal_node = + std::static_pointer_cast<gandiva::LiteralNode>(ggandiva_node_get_raw(GGANDIVA_NODE(node))); + return arrow::util::get<Type>(gandiva_literal_node->holder()); +} + +G_BEGIN_DECLS + +/** + * SECTION: node + * @section_id: node-classes + * @title: Node classes + * @include: gandiva-glib/gandiva-glib.h + * + * #GGandivaNode is a base class for a node in the expression tree. + * + * #GGandivaFieldNode is a class for a node in the expression tree, representing an Arrow field. + * + * #GGandivaFunctionNode is a class for a node in the expression tree, representing a function. + * + * #GGandivaLiteralNode is a base class for a node in the expression tree, + * representing a literal. + * + * #GGandivaNullLiteralNode is a class for a node in the expression tree, + * representing a null literal. + * + * #GGandivaBooleanLiteralNode is a class for a node in the expression tree, + * representing a boolean literal. + * + * #GGandivaInt8LiteralNode is a class for a node in the expression tree, + * representing a 8-bit integer literal. + * + * #GGandivaUInt8LiteralNode is a class for a node in the expression tree, + * representing a 8-bit unsigned integer literal. + * + * #GGandivaInt16LiteralNode is a class for a node in the expression tree, + * representing a 16-bit integer literal. + * + * #GGandivaUInt16LiteralNode is a class for a node in the expression tree, + * representing a 16-bit unsigned integer literal. + * + * #GGandivaInt32LiteralNode is a class for a node in the expression tree, + * representing a 32-bit integer literal. + * + * #GGandivaUInt32LiteralNode is a class for a node in the expression tree, + * representing a 32-bit unsigned integer literal. + * + * #GGandivaInt64LiteralNode is a class for a node in the expression tree, + * representing a 64-bit integer literal. + * + * #GGandivaUInt64LiteralNode is a class for a node in the expression tree, + * representing a 64-bit unsigned integer literal. + * + * #GGandivaFloatLiteralNode is a class for a node in the expression tree, + * representing a 32-bit floating point literal. + * + * #GGandivaDoubleLiteralNode is a class for a node in the expression tree, + * representing a 64-bit floating point literal. + * + * #GGandivaBinaryLiteralNode is a class for a node in the expression tree, + * representing a binary literal. + * + * #GGandivaStringLiteralNode is a class for a node in the expression tree, + * representing an UTF-8 encoded string literal. + * + * #GGandivaIfNode is a class for a node in the expression tree, representing an if-else. + * + * #GGandivaBooleanNode is a class for a node in the expression tree, representing a boolean. + * + * #GGandivaAndNode is a class for a node in the expression tree, representing an AND. + * + * #GGandivaOrNode is a class for a node in the expression tree, representing an OR. + * + * Since: 0.12.0 + */ + +typedef struct GGandivaNodePrivate_ { + std::shared_ptr<gandiva::Node> node; + GArrowDataType *return_type; +} GGandivaNodePrivate; + +enum { + PROP_NODE = 1, + PROP_RETURN_TYPE +}; + +G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GGandivaNode, + ggandiva_node, + G_TYPE_OBJECT) + +#define GGANDIVA_NODE_GET_PRIVATE(object) \ + static_cast<GGandivaNodePrivate *>( \ + ggandiva_node_get_instance_private( \ + GGANDIVA_NODE(object))) + +static void +ggandiva_node_dispose(GObject *object) +{ + auto priv = GGANDIVA_NODE_GET_PRIVATE(object); + + if (priv->return_type) { + g_object_unref(priv->return_type); + priv->return_type = nullptr; + } + + G_OBJECT_CLASS(ggandiva_node_parent_class)->dispose(object); +} + +static void +ggandiva_node_finalize(GObject *object) +{ + auto priv = GGANDIVA_NODE_GET_PRIVATE(object); + + priv->node.~shared_ptr(); + + G_OBJECT_CLASS(ggandiva_node_parent_class)->finalize(object); +} + +static void +ggandiva_node_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GGANDIVA_NODE_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_NODE: + priv->node = + *static_cast<std::shared_ptr<gandiva::Node> *>(g_value_get_pointer(value)); + break; + case PROP_RETURN_TYPE: + priv->return_type = GARROW_DATA_TYPE(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +ggandiva_node_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GGANDIVA_NODE_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_RETURN_TYPE: + g_value_set_object(value, priv->return_type); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +ggandiva_node_init(GGandivaNode *object) +{ + auto priv = GGANDIVA_NODE_GET_PRIVATE(object); + new(&priv->node) std::shared_ptr<gandiva::Node>; +} + +static void +ggandiva_node_class_init(GGandivaNodeClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = ggandiva_node_dispose; + gobject_class->finalize = ggandiva_node_finalize; + gobject_class->set_property = ggandiva_node_set_property; + gobject_class->get_property = ggandiva_node_get_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("node", + "Node", + "The raw std::shared<gandiva::Node> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_NODE, spec); + + spec = g_param_spec_object("return-type", + "Return type", + "The return type", + GARROW_TYPE_DATA_TYPE, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_RETURN_TYPE, spec); +} + +/** + * ggandiva_node_to_string: + * @node: A #GGandivaNode. + * + * Returns: (transfer full): The string representation of the node. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 1.0.0 + */ +gchar * +ggandiva_node_to_string(GGandivaNode *node) +{ + auto gandiva_node = ggandiva_node_get_raw(node); + auto string = gandiva_node->ToString(); + return g_strndup(string.data(), string.size()); +} + +typedef struct GGandivaFieldNodePrivate_ { + GArrowField *field; +} GGandivaFieldNodePrivate; + +enum { + PROP_FIELD = 1 +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GGandivaFieldNode, + ggandiva_field_node, + GGANDIVA_TYPE_NODE) + +#define GGANDIVA_FIELD_NODE_GET_PRIVATE(object) \ + static_cast<GGandivaFieldNodePrivate *>( \ + ggandiva_field_node_get_instance_private( \ + GGANDIVA_FIELD_NODE(object))) + +static void +ggandiva_field_node_dispose(GObject *object) +{ + auto priv = GGANDIVA_FIELD_NODE_GET_PRIVATE(object); + + if (priv->field) { + g_object_unref(priv->field); + priv->field = nullptr; + } + + G_OBJECT_CLASS(ggandiva_field_node_parent_class)->dispose(object); +} + +static void +ggandiva_field_node_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GGANDIVA_FIELD_NODE_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_FIELD: + priv->field = GARROW_FIELD(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +ggandiva_field_node_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GGANDIVA_FIELD_NODE_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_FIELD: + g_value_set_object(value, priv->field); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +ggandiva_field_node_init(GGandivaFieldNode *field_node) +{ +} + +static void +ggandiva_field_node_class_init(GGandivaFieldNodeClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = ggandiva_field_node_dispose; + gobject_class->set_property = ggandiva_field_node_set_property; + gobject_class->get_property = ggandiva_field_node_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("field", + "Field", + "The field", + GARROW_TYPE_FIELD, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_FIELD, spec); +} + +/** + * ggandiva_field_node_new: + * @field: A #GArrowField. + * + * Returns: A newly created #GGandivaFieldNode for the given field. + * + * Since: 0.12.0 + */ +GGandivaFieldNode * +ggandiva_field_node_new(GArrowField *field) +{ + auto arrow_field = garrow_field_get_raw(field); + auto gandiva_node = gandiva::TreeExprBuilder::MakeField(arrow_field); + return ggandiva_field_node_new_raw(&gandiva_node, field); +} + + +typedef struct GGandivaFunctionNodePrivate_ { + gchar *name; + GList *parameters; +} GGandivaFunctionNodePrivate; + +enum { + PROP_NAME = 1 +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GGandivaFunctionNode, + ggandiva_function_node, + GGANDIVA_TYPE_NODE) + +#define GGANDIVA_FUNCTION_NODE_GET_PRIVATE(object) \ + static_cast<GGandivaFunctionNodePrivate *>( \ + ggandiva_function_node_get_instance_private( \ + GGANDIVA_FUNCTION_NODE(object))) \ + +static void +ggandiva_function_node_dispose(GObject *object) +{ + auto priv = GGANDIVA_FUNCTION_NODE_GET_PRIVATE(object); + + if (priv->parameters) { + for (auto node = priv->parameters; node; node = g_list_next(node)) { + auto parameter = GGANDIVA_NODE(node->data); + g_object_unref(parameter); + } + g_list_free(priv->parameters); + priv->parameters = nullptr; + } + + G_OBJECT_CLASS(ggandiva_function_node_parent_class)->dispose(object); +} + +static void +ggandiva_function_node_finalize(GObject *object) +{ + auto priv = GGANDIVA_FUNCTION_NODE_GET_PRIVATE(object); + + g_free(priv->name); + + G_OBJECT_CLASS(ggandiva_function_node_parent_class)->finalize(object); +} + +static void +ggandiva_function_node_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GGANDIVA_FUNCTION_NODE_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_NAME: + priv->name = g_value_dup_string(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +ggandiva_function_node_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GGANDIVA_FUNCTION_NODE_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_NAME: + g_value_set_string(value, priv->name); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +ggandiva_function_node_init(GGandivaFunctionNode *function_node) +{ + auto priv = GGANDIVA_FUNCTION_NODE_GET_PRIVATE(function_node); + priv->parameters = nullptr; +} + +static void +ggandiva_function_node_class_init(GGandivaFunctionNodeClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = ggandiva_function_node_dispose; + gobject_class->finalize = ggandiva_function_node_finalize; + gobject_class->set_property = ggandiva_function_node_set_property; + gobject_class->get_property = ggandiva_function_node_get_property; + + GParamSpec *spec; + spec = g_param_spec_string("name", + "Name", + "The name of the function", + nullptr, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_NAME, spec); +} + +/** + * ggandiva_function_node_new: + * @name: The name of the function to be called. + * @parameters: (element-type GGandivaNode): The parameters of the function call. + * @return_type: The return type of the function call. + * + * Returns: A newly created #GGandivaFunctionNode for the function call. + * + * Since: 0.12.0 + */ +GGandivaFunctionNode * +ggandiva_function_node_new(const gchar *name, + GList *parameters, + GArrowDataType *return_type) +{ + std::vector<std::shared_ptr<gandiva::Node>> gandiva_nodes; + for (auto node = parameters; node; node = g_list_next(node)) { + auto gandiva_node = ggandiva_node_get_raw(GGANDIVA_NODE(node->data)); + gandiva_nodes.push_back(gandiva_node); + } + auto arrow_return_type = garrow_data_type_get_raw(return_type); + auto gandiva_node = gandiva::TreeExprBuilder::MakeFunction(name, + gandiva_nodes, + arrow_return_type); + return ggandiva_function_node_new_raw(&gandiva_node, + name, + parameters, + return_type); +} + +/** + * ggandiva_function_node_get_parameters: + * @node: A #GGandivaFunctionNode. + * + * Returns: (transfer none) (element-type GGandivaNode): + * The parameters of the function node. + * + * Since: 0.12.0 + */ +GList * +ggandiva_function_node_get_parameters(GGandivaFunctionNode *node) +{ + auto priv = GGANDIVA_FUNCTION_NODE_GET_PRIVATE(node); + return priv->parameters; +} + + +G_DEFINE_TYPE(GGandivaLiteralNode, + ggandiva_literal_node, + GGANDIVA_TYPE_NODE) + +static void +ggandiva_literal_node_init(GGandivaLiteralNode *literal_node) +{ +} + +static void +ggandiva_literal_node_class_init(GGandivaLiteralNodeClass *klass) +{ +} + + +G_DEFINE_TYPE(GGandivaNullLiteralNode, + ggandiva_null_literal_node, + GGANDIVA_TYPE_LITERAL_NODE) + +static void +ggandiva_null_literal_node_init(GGandivaNullLiteralNode *null_literal_node) +{ +} + +static void +ggandiva_null_literal_node_class_init(GGandivaNullLiteralNodeClass *klass) +{ +} + +/** + * ggandiva_null_literal_node_new: + * @return_type: A #GArrowDataType. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GGandivaNullLiteralNode for + * the type or %NULL on error. + * + * Since: 0.12.0 + */ +GGandivaNullLiteralNode * +ggandiva_null_literal_node_new(GArrowDataType *return_type, + GError **error) +{ + auto arrow_return_type = garrow_data_type_get_raw(return_type); + auto gandiva_node = gandiva::TreeExprBuilder::MakeNull(arrow_return_type); + if (!gandiva_node) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "[gandiva][null-literal-node][new] " + "failed to create: <%s>", + arrow_return_type->ToString().c_str()); + return NULL; + } + return GGANDIVA_NULL_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, + return_type)); +} + + +G_DEFINE_TYPE(GGandivaBooleanLiteralNode, + ggandiva_boolean_literal_node, + GGANDIVA_TYPE_LITERAL_NODE) + +static void +ggandiva_boolean_literal_node_init(GGandivaBooleanLiteralNode *boolean_literal_node) +{ +} + +static void +ggandiva_boolean_literal_node_class_init(GGandivaBooleanLiteralNodeClass *klass) +{ +} + +/** + * ggandiva_boolean_literal_node_new: + * @value: The value of the boolean literal. + * + * Returns: A newly created #GGandivaBooleanLiteralNode. + * + * Since: 0.12.0 + */ +GGandivaBooleanLiteralNode * +ggandiva_boolean_literal_node_new(gboolean value) +{ + auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(static_cast<bool>(value)); + return GGANDIVA_BOOLEAN_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, + NULL)); +} + +/** + * ggandiva_boolean_literal_node_get_value: + * @node: A #GGandivaBooleanLiteralNode. + * + * Returns: The value of the boolean literal. + * + * Since: 0.12.0 + */ +gboolean +ggandiva_boolean_literal_node_get_value(GGandivaBooleanLiteralNode *node) +{ + auto value = ggandiva_literal_node_get<bool>(GGANDIVA_LITERAL_NODE(node)); + return static_cast<gboolean>(value); +} + + +G_DEFINE_TYPE(GGandivaInt8LiteralNode, + ggandiva_int8_literal_node, + GGANDIVA_TYPE_LITERAL_NODE) + +static void +ggandiva_int8_literal_node_init(GGandivaInt8LiteralNode *int8_literal_node) +{ +} + +static void +ggandiva_int8_literal_node_class_init(GGandivaInt8LiteralNodeClass *klass) +{ +} + +/** + * ggandiva_int8_literal_node_new: + * @value: The value of the 8-bit integer literal. + * + * Returns: A newly created #GGandivaInt8LiteralNode. + * + * Since: 0.12.0 + */ +GGandivaInt8LiteralNode * +ggandiva_int8_literal_node_new(gint8 value) +{ + auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value); + return GGANDIVA_INT8_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, + NULL)); +} + +/** + * ggandiva_int8_literal_node_get_value: + * @node: A #GGandivaInt8LiteralNode. + * + * Returns: The value of the 8-bit integer literal. + * + * Since: 0.12.0 + */ +gint8 +ggandiva_int8_literal_node_get_value(GGandivaInt8LiteralNode *node) +{ + return ggandiva_literal_node_get<int8_t>(GGANDIVA_LITERAL_NODE(node)); +} + + +G_DEFINE_TYPE(GGandivaUInt8LiteralNode, + ggandiva_uint8_literal_node, + GGANDIVA_TYPE_LITERAL_NODE) + +static void +ggandiva_uint8_literal_node_init(GGandivaUInt8LiteralNode *uint8_literal_node) +{ +} + +static void +ggandiva_uint8_literal_node_class_init(GGandivaUInt8LiteralNodeClass *klass) +{ +} + +/** + * ggandiva_uint8_literal_node_new: + * @value: The value of the 8-bit unsigned integer literal. + * + * Returns: A newly created #GGandivaUInt8LiteralNode. + * + * Since: 0.12.0 + */ +GGandivaUInt8LiteralNode * +ggandiva_uint8_literal_node_new(guint8 value) +{ + auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value); + return GGANDIVA_UINT8_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, + NULL)); +} + +/** + * ggandiva_uint8_literal_node_get_value: + * @node: A #GGandivaUInt8LiteralNode. + * + * Returns: The value of the 8-bit unsigned integer literal. + * + * Since: 0.12.0 + */ +guint8 +ggandiva_uint8_literal_node_get_value(GGandivaUInt8LiteralNode *node) +{ + return ggandiva_literal_node_get<uint8_t>(GGANDIVA_LITERAL_NODE(node)); +} + + +G_DEFINE_TYPE(GGandivaInt16LiteralNode, + ggandiva_int16_literal_node, + GGANDIVA_TYPE_LITERAL_NODE) + +static void +ggandiva_int16_literal_node_init(GGandivaInt16LiteralNode *int16_literal_node) +{ +} + +static void +ggandiva_int16_literal_node_class_init(GGandivaInt16LiteralNodeClass *klass) +{ +} + +/** + * ggandiva_int16_literal_node_new: + * @value: The value of the 16-bit integer literal. + * + * Returns: A newly created #GGandivaInt16LiteralNode. + * + * Since: 0.12.0 + */ +GGandivaInt16LiteralNode * +ggandiva_int16_literal_node_new(gint16 value) +{ + auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value); + return GGANDIVA_INT16_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, + NULL)); +} + +/** + * ggandiva_int16_literal_node_get_value: + * @node: A #GGandivaInt16LiteralNode. + * + * Returns: The value of the 16-bit integer literal. + * + * Since: 0.12.0 + */ +gint16 +ggandiva_int16_literal_node_get_value(GGandivaInt16LiteralNode *node) +{ + return ggandiva_literal_node_get<int16_t>(GGANDIVA_LITERAL_NODE(node)); +} + + +G_DEFINE_TYPE(GGandivaUInt16LiteralNode, + ggandiva_uint16_literal_node, + GGANDIVA_TYPE_LITERAL_NODE) + +static void +ggandiva_uint16_literal_node_init(GGandivaUInt16LiteralNode *uint16_literal_node) +{ +} + +static void +ggandiva_uint16_literal_node_class_init(GGandivaUInt16LiteralNodeClass *klass) +{ +} + +/** + * ggandiva_uint16_literal_node_new: + * @value: The value of the 16-bit unsigned integer literal. + * + * Returns: A newly created #GGandivaUInt16LiteralNode. + * + * Since: 0.12.0 + */ +GGandivaUInt16LiteralNode * +ggandiva_uint16_literal_node_new(guint16 value) +{ + auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value); + return GGANDIVA_UINT16_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, + NULL)); +} + +/** + * ggandiva_uint16_literal_node_get_value: + * @node: A #GGandivaUInt16LiteralNode. + * + * Returns: The value of the 16-bit unsigned integer literal. + * + * Since: 0.12.0 + */ +guint16 +ggandiva_uint16_literal_node_get_value(GGandivaUInt16LiteralNode *node) +{ + return ggandiva_literal_node_get<uint16_t>(GGANDIVA_LITERAL_NODE(node)); +} + + +G_DEFINE_TYPE(GGandivaInt32LiteralNode, + ggandiva_int32_literal_node, + GGANDIVA_TYPE_LITERAL_NODE) + +static void +ggandiva_int32_literal_node_init(GGandivaInt32LiteralNode *int32_literal_node) +{ +} + +static void +ggandiva_int32_literal_node_class_init(GGandivaInt32LiteralNodeClass *klass) +{ +} + +/** + * ggandiva_int32_literal_node_new: + * @value: The value of the 32-bit integer literal. + * + * Returns: A newly created #GGandivaInt32LiteralNode. + * + * Since: 0.12.0 + */ +GGandivaInt32LiteralNode * +ggandiva_int32_literal_node_new(gint32 value) +{ + auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value); + return GGANDIVA_INT32_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, + NULL)); +} + +/** + * ggandiva_int32_literal_node_get_value: + * @node: A #GGandivaInt32LiteralNode. + * + * Returns: The value of the 32-bit integer literal. + * + * Since: 0.12.0 + */ +gint32 +ggandiva_int32_literal_node_get_value(GGandivaInt32LiteralNode *node) +{ + return ggandiva_literal_node_get<int32_t>(GGANDIVA_LITERAL_NODE(node)); +} + + +G_DEFINE_TYPE(GGandivaUInt32LiteralNode, + ggandiva_uint32_literal_node, + GGANDIVA_TYPE_LITERAL_NODE) + +static void +ggandiva_uint32_literal_node_init(GGandivaUInt32LiteralNode *uint32_literal_node) +{ +} + +static void +ggandiva_uint32_literal_node_class_init(GGandivaUInt32LiteralNodeClass *klass) +{ +} + +/** + * ggandiva_uint32_literal_node_new: + * @value: The value of the 32-bit unsigned integer literal. + * + * Returns: A newly created #GGandivaUInt32LiteralNode. + * + * Since: 0.12.0 + */ +GGandivaUInt32LiteralNode * +ggandiva_uint32_literal_node_new(guint32 value) +{ + auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value); + return GGANDIVA_UINT32_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, + NULL)); +} + +/** + * ggandiva_uint32_literal_node_get_value: + * @node: A #GGandivaUInt32LiteralNode. + * + * Returns: The value of the 32-bit unsigned integer literal. + * + * Since: 0.12.0 + */ +guint32 +ggandiva_uint32_literal_node_get_value(GGandivaUInt32LiteralNode *node) +{ + return ggandiva_literal_node_get<uint32_t>(GGANDIVA_LITERAL_NODE(node)); +} + + +G_DEFINE_TYPE(GGandivaInt64LiteralNode, + ggandiva_int64_literal_node, + GGANDIVA_TYPE_LITERAL_NODE) + +static void +ggandiva_int64_literal_node_init(GGandivaInt64LiteralNode *int64_literal_node) +{ +} + +static void +ggandiva_int64_literal_node_class_init(GGandivaInt64LiteralNodeClass *klass) +{ +} + +/** + * ggandiva_int64_literal_node_new: + * @value: The value of the 64-bit integer literal. + * + * Returns: A newly created #GGandivaInt64LiteralNode. + * + * Since: 0.12.0 + */ +GGandivaInt64LiteralNode * +ggandiva_int64_literal_node_new(gint64 value) +{ + auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value); + return GGANDIVA_INT64_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, + NULL)); +} + +/** + * ggandiva_int64_literal_node_get_value: + * @node: A #GGandivaInt64LiteralNode. + * + * Returns: The value of the 64-bit integer literal. + * + * Since: 0.12.0 + */ +gint64 +ggandiva_int64_literal_node_get_value(GGandivaInt64LiteralNode *node) +{ + return ggandiva_literal_node_get<int64_t>(GGANDIVA_LITERAL_NODE(node)); +} + + +G_DEFINE_TYPE(GGandivaUInt64LiteralNode, + ggandiva_uint64_literal_node, + GGANDIVA_TYPE_LITERAL_NODE) + +static void +ggandiva_uint64_literal_node_init(GGandivaUInt64LiteralNode *uint64_literal_node) +{ +} + +static void +ggandiva_uint64_literal_node_class_init(GGandivaUInt64LiteralNodeClass *klass) +{ +} + +/** + * ggandiva_uint64_literal_node_new: + * @value: The value of the 64-bit unsigned integer literal. + * + * Returns: A newly created #GGandivaUInt64LiteralNode. + * + * Since: 0.12.0 + */ +GGandivaUInt64LiteralNode * +ggandiva_uint64_literal_node_new(guint64 value) +{ + auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value); + return GGANDIVA_UINT64_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, + NULL)); +} + +/** + * ggandiva_uint64_literal_node_get_value: + * @node: A #GGandivaUInt64LiteralNode. + * + * Returns: The value of the 64-bit unsigned integer literal. + * + * Since: 0.12.0 + */ +guint64 +ggandiva_uint64_literal_node_get_value(GGandivaUInt64LiteralNode *node) +{ + return ggandiva_literal_node_get<uint64_t>(GGANDIVA_LITERAL_NODE(node)); +} + + +G_DEFINE_TYPE(GGandivaFloatLiteralNode, + ggandiva_float_literal_node, + GGANDIVA_TYPE_LITERAL_NODE) + +static void +ggandiva_float_literal_node_init(GGandivaFloatLiteralNode *float_literal_node) +{ +} + +static void +ggandiva_float_literal_node_class_init(GGandivaFloatLiteralNodeClass *klass) +{ +} + +/** + * ggandiva_float_literal_node_new: + * @value: The value of the 32-bit floating point literal. + * + * Returns: A newly created #GGandivaFloatLiteralNode. + * + * Since: 0.12.0 + */ +GGandivaFloatLiteralNode * +ggandiva_float_literal_node_new(gfloat value) +{ + auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value); + return GGANDIVA_FLOAT_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, + NULL)); +} + +/** + * ggandiva_float_literal_node_get_value: + * @node: A #GGandivaFloatLiteralNode. + * + * Returns: The value of the 32-bit floating point literal. + * + * Since: 0.12.0 + */ +gfloat +ggandiva_float_literal_node_get_value(GGandivaFloatLiteralNode *node) +{ + return ggandiva_literal_node_get<float>(GGANDIVA_LITERAL_NODE(node)); +} + + +G_DEFINE_TYPE(GGandivaDoubleLiteralNode, + ggandiva_double_literal_node, + GGANDIVA_TYPE_LITERAL_NODE) + +static void +ggandiva_double_literal_node_init(GGandivaDoubleLiteralNode *double_literal_node) +{ +} + +static void +ggandiva_double_literal_node_class_init(GGandivaDoubleLiteralNodeClass *klass) +{ +} + +/** + * ggandiva_double_literal_node_new: + * @value: The value of the 64-bit floating point literal. + * + * Returns: A newly created #GGandivaDoubleLiteralNode. + * + * Since: 0.12.0 + */ +GGandivaDoubleLiteralNode * +ggandiva_double_literal_node_new(gdouble value) +{ + auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value); + return GGANDIVA_DOUBLE_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, + NULL)); +} + +/** + * ggandiva_double_literal_node_get_value: + * @node: A #GGandivaDoubleLiteralNode. + * + * Returns: The value of the 64-bit floating point literal. + * + * Since: 0.12.0 + */ +gdouble +ggandiva_double_literal_node_get_value(GGandivaDoubleLiteralNode *node) +{ + return ggandiva_literal_node_get<double>(GGANDIVA_LITERAL_NODE(node)); +} + + +typedef struct GGandivaBinaryLiteralNodePrivate_ { + GBytes *value; +} GGandivaBinaryLiteralNodePrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GGandivaBinaryLiteralNode, + ggandiva_binary_literal_node, + GGANDIVA_TYPE_LITERAL_NODE) + +#define GGANDIVA_BINARY_LITERAL_NODE_GET_PRIVATE(object) \ + static_cast<GGandivaBinaryLiteralNodePrivate *>( \ + ggandiva_binary_literal_node_get_instance_private( \ + GGANDIVA_BINARY_LITERAL_NODE(object))) + +static void +ggandiva_binary_literal_node_dispose(GObject *object) +{ + auto priv = GGANDIVA_BINARY_LITERAL_NODE_GET_PRIVATE(object); + + if (priv->value) { + g_bytes_unref(priv->value); + priv->value = nullptr; + } + + G_OBJECT_CLASS(ggandiva_binary_literal_node_parent_class)->dispose(object); +} + +static void +ggandiva_binary_literal_node_init(GGandivaBinaryLiteralNode *binary_literal_node) +{ +} + +static void +ggandiva_binary_literal_node_class_init(GGandivaBinaryLiteralNodeClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = ggandiva_binary_literal_node_dispose; +} + +/** + * ggandiva_binary_literal_node_new: + * @value: (array length=size): The value of the binary literal. + * @size: The number of bytes of the value. + * + * Returns: A newly created #GGandivaBinaryLiteralNode. + * + * Since: 0.12.0 + */ +GGandivaBinaryLiteralNode * +ggandiva_binary_literal_node_new(const guint8 *value, + gsize size) +{ + auto gandiva_node = + gandiva::TreeExprBuilder::MakeBinaryLiteral(std::string(reinterpret_cast<const char *>(value), + size)); + return GGANDIVA_BINARY_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, + NULL)); +} + +/** + * ggandiva_binary_literal_node_new_bytes: + * @value: The value of the binary literal. + * + * Returns: A newly created #GGandivaBinaryLiteralNode. + * + * Since: 0.12.0 + */ +GGandivaBinaryLiteralNode * +ggandiva_binary_literal_node_new_bytes(GBytes *value) +{ + size_t value_size; + auto raw_value = g_bytes_get_data(value, &value_size); + auto gandiva_node = + gandiva::TreeExprBuilder::MakeBinaryLiteral( + std::string(reinterpret_cast<const char *>(raw_value), + value_size)); + auto literal_node = ggandiva_literal_node_new_raw(&gandiva_node, + NULL); + auto priv = GGANDIVA_BINARY_LITERAL_NODE_GET_PRIVATE(literal_node); + priv->value = value; + g_bytes_ref(priv->value); + return GGANDIVA_BINARY_LITERAL_NODE(literal_node); +} + +/** + * ggandiva_binary_literal_node_get_value: + * @node: A #GGandivaBinaryLiteralNode. + * + * Returns: (transfer none): The value of the binary literal. + * + * Since: 0.12.0 + */ +GBytes * +ggandiva_binary_literal_node_get_value(GGandivaBinaryLiteralNode *node) +{ + auto priv = GGANDIVA_BINARY_LITERAL_NODE_GET_PRIVATE(node); + if (!priv->value) { + auto value = ggandiva_literal_node_get<std::string>(GGANDIVA_LITERAL_NODE(node)); + priv->value = g_bytes_new(value.data(), value.size()); + } + + return priv->value; +} + + +G_DEFINE_TYPE(GGandivaStringLiteralNode, + ggandiva_string_literal_node, + GGANDIVA_TYPE_LITERAL_NODE) + +static void +ggandiva_string_literal_node_init(GGandivaStringLiteralNode *string_literal_node) +{ +} + +static void +ggandiva_string_literal_node_class_init(GGandivaStringLiteralNodeClass *klass) +{ +} + +/** + * ggandiva_string_literal_node_new: + * @value: The value of the UTF-8 encoded string literal. + * + * Returns: A newly created #GGandivaStringLiteralNode. + * + * Since: 0.12.0 + */ +GGandivaStringLiteralNode * +ggandiva_string_literal_node_new(const gchar *value) +{ + auto gandiva_node = gandiva::TreeExprBuilder::MakeStringLiteral(value); + return GGANDIVA_STRING_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, + NULL)); +} + +/** + * ggandiva_string_literal_node_get_value: + * @node: A #GGandivaStringLiteralNode. + * + * Returns: The value of the UTF-8 encoded string literal. + * + * Since: 0.12.0 + */ +const gchar * +ggandiva_string_literal_node_get_value(GGandivaStringLiteralNode *node) +{ + auto &value = ggandiva_literal_node_get<std::string>(GGANDIVA_LITERAL_NODE(node)); + return value.c_str(); +} + + +typedef struct GGandivaIfNodePrivate_ { + GGandivaNode *condition_node; + GGandivaNode *then_node; + GGandivaNode *else_node; +} GGandivaIfNodePrivate; + +enum { + PROP_CONDITION_NODE = 1, + PROP_THEN_NODE, + PROP_ELSE_NODE, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GGandivaIfNode, + ggandiva_if_node, + GGANDIVA_TYPE_NODE) + +#define GGANDIVA_IF_NODE_GET_PRIVATE(object) \ + static_cast<GGandivaIfNodePrivate *>( \ + ggandiva_if_node_get_instance_private( \ + GGANDIVA_IF_NODE(object))) + +static void +ggandiva_if_node_dispose(GObject *object) +{ + auto priv = GGANDIVA_IF_NODE_GET_PRIVATE(object); + + if (priv->condition_node) { + g_object_unref(priv->condition_node); + priv->condition_node = nullptr; + } + + if (priv->then_node) { + g_object_unref(priv->then_node); + priv->then_node = nullptr; + } + + if (priv->else_node) { + g_object_unref(priv->else_node); + priv->else_node = nullptr; + } + + G_OBJECT_CLASS(ggandiva_if_node_parent_class)->dispose(object); +} + +static void +ggandiva_if_node_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GGANDIVA_IF_NODE_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_CONDITION_NODE: + priv->condition_node = GGANDIVA_NODE(g_value_dup_object(value)); + break; + case PROP_THEN_NODE: + priv->then_node = GGANDIVA_NODE(g_value_dup_object(value)); + break; + case PROP_ELSE_NODE: + priv->else_node = GGANDIVA_NODE(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +ggandiva_if_node_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GGANDIVA_IF_NODE_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_CONDITION_NODE: + g_value_set_object(value, priv->condition_node); + break; + case PROP_THEN_NODE: + g_value_set_object(value, priv->then_node); + break; + case PROP_ELSE_NODE: + g_value_set_object(value, priv->else_node); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +ggandiva_if_node_init(GGandivaIfNode *if_node) +{ +} + +static void +ggandiva_if_node_class_init(GGandivaIfNodeClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = ggandiva_if_node_dispose; + gobject_class->set_property = ggandiva_if_node_set_property; + gobject_class->get_property = ggandiva_if_node_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("condition-node", + "Condition node", + "The condition node", + GGANDIVA_TYPE_NODE, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_CONDITION_NODE, spec); + + spec = g_param_spec_object("then-node", + "Then node", + "The then node", + GGANDIVA_TYPE_NODE, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_THEN_NODE, spec); + + spec = g_param_spec_object("else-node", + "Else node", + "The else node", + GGANDIVA_TYPE_NODE, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_ELSE_NODE, spec); +} + +/** + * ggandiva_if_node_new: + * @condition_node: the node with the condition for if-else expression. + * @then_node: the node in case the condition node is true. + * @else_node: the node in case the condition node is false. + * @return_type: A #GArrowDataType. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GGandivaIfNode or %NULL on error. + * + * Since: 0.12.0 + */ +GGandivaIfNode * +ggandiva_if_node_new(GGandivaNode *condition_node, + GGandivaNode *then_node, + GGandivaNode *else_node, + GArrowDataType *return_type, + GError **error) +{ + if (!condition_node || !then_node || !else_node || !return_type) { + /* TODO: Improve error message to show which arguments are invalid. */ + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "[gandiva][if-literal-node][new] " + "all arguments must not NULL"); + return NULL; + } + auto gandiva_condition_node = ggandiva_node_get_raw(condition_node); + auto gandiva_then_node = ggandiva_node_get_raw(then_node); + auto gandiva_else_node = ggandiva_node_get_raw(else_node); + auto arrow_return_type = garrow_data_type_get_raw(return_type); + auto gandiva_node = gandiva::TreeExprBuilder::MakeIf(gandiva_condition_node, + gandiva_then_node, + gandiva_else_node, + arrow_return_type); + if (!gandiva_node) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "[gandiva][if-literal-node][new] " + "failed to create: if (<%s>) {<%s>} else {<%s>} -> <%s>", + gandiva_condition_node->ToString().c_str(), + gandiva_then_node->ToString().c_str(), + gandiva_else_node->ToString().c_str(), + arrow_return_type->ToString().c_str()); + return NULL; + } + return ggandiva_if_node_new_raw(&gandiva_node, + condition_node, + then_node, + else_node, + return_type); +} + + +typedef struct GGandivaBooleanNodePrivate_ { + GList *children; +} GGandivaBooleanNodePrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GGandivaBooleanNode, + ggandiva_boolean_node, + GGANDIVA_TYPE_NODE) + +#define GGANDIVA_BOOLEAN_NODE_GET_PRIVATE(object) \ + static_cast<GGandivaBooleanNodePrivate *>( \ + ggandiva_boolean_node_get_instance_private( \ + GGANDIVA_BOOLEAN_NODE(object))) \ + +static void +ggandiva_boolean_node_dispose(GObject *object) +{ + auto priv = GGANDIVA_BOOLEAN_NODE_GET_PRIVATE(object); + + if (priv->children) { + g_list_free_full(priv->children, g_object_unref); + priv->children = nullptr; + } + + G_OBJECT_CLASS(ggandiva_boolean_node_parent_class)->dispose(object); +} + +static void +ggandiva_boolean_node_init(GGandivaBooleanNode *boolean_node) +{ + auto priv = GGANDIVA_BOOLEAN_NODE_GET_PRIVATE(boolean_node); + priv->children = nullptr; +} + +static void +ggandiva_boolean_node_class_init(GGandivaBooleanNodeClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = ggandiva_boolean_node_dispose; +} + +/** + * ggandiva_boolean_node_get_children: + * @node: A #GGandivaBooleanNode. + * + * Returns: (transfer none) (element-type GGandivaNode): + * The children of the boolean node. + * + * Since: 0.17.0 + */ +GList * +ggandiva_boolean_node_get_children(GGandivaBooleanNode *node) +{ + auto priv = GGANDIVA_BOOLEAN_NODE_GET_PRIVATE(node); + return priv->children; +} + + +G_DEFINE_TYPE(GGandivaAndNode, + ggandiva_and_node, + GGANDIVA_TYPE_BOOLEAN_NODE) + +static void +ggandiva_and_node_init(GGandivaAndNode *and_node) +{ +} + +static void +ggandiva_and_node_class_init(GGandivaAndNodeClass *klass) +{ +} + +/** + * ggandiva_and_node_new: + * @children: (element-type GGandivaNode): The children of the AND node. + * + * Returns: A newly created #GGandivaAndNode for the AND expression. + * + * Since: 0.17.0 + */ +GGandivaAndNode * +ggandiva_and_node_new(GList *children) +{ + std::vector<std::shared_ptr<gandiva::Node>> gandiva_nodes; + for (auto node = children; node; node = g_list_next(node)) { + auto gandiva_node = ggandiva_node_get_raw(GGANDIVA_NODE(node->data)); + gandiva_nodes.push_back(gandiva_node); + } + auto gandiva_node = gandiva::TreeExprBuilder::MakeAnd(gandiva_nodes); + return GGANDIVA_AND_NODE(ggandiva_boolean_node_new_raw(&gandiva_node, + children)); +} + + +G_DEFINE_TYPE(GGandivaOrNode, + ggandiva_or_node, + GGANDIVA_TYPE_BOOLEAN_NODE) + +static void +ggandiva_or_node_init(GGandivaOrNode *or_node) +{ +} + +static void +ggandiva_or_node_class_init(GGandivaOrNodeClass *klass) +{ +} + +/** + * ggandiva_or_node_new: + * @children: (element-type GGandivaNode): The children of the OR node. + * + * Returns: A newly created #GGandivaOrNode for the OR expression. + * + * Since: 0.17.0 + */ +GGandivaOrNode * +ggandiva_or_node_new(GList *children) +{ + std::vector<std::shared_ptr<gandiva::Node>> gandiva_nodes; + for (auto node = children; node; node = g_list_next(node)) { + auto gandiva_node = ggandiva_node_get_raw(GGANDIVA_NODE(node->data)); + gandiva_nodes.push_back(gandiva_node); + } + auto gandiva_node = gandiva::TreeExprBuilder::MakeOr(gandiva_nodes); + return GGANDIVA_OR_NODE(ggandiva_boolean_node_new_raw(&gandiva_node, + children)); +} + +G_END_DECLS + +std::shared_ptr<gandiva::Node> +ggandiva_node_get_raw(GGandivaNode *node) +{ + auto priv = GGANDIVA_NODE_GET_PRIVATE(node); + return priv->node; +} + +GGandivaFieldNode * +ggandiva_field_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node, + GArrowField *field) +{ + auto arrow_return_type = (*gandiva_node)->return_type(); + auto return_type = garrow_field_get_data_type(field); + auto field_node = g_object_new(GGANDIVA_TYPE_FIELD_NODE, + "node", gandiva_node, + "field", field, + "return-type", return_type, + NULL); + return GGANDIVA_FIELD_NODE(field_node); +} + +GGandivaFunctionNode * +ggandiva_function_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node, + const gchar *name, + GList *parameters, + GArrowDataType *return_type) +{ + auto function_node = g_object_new(GGANDIVA_TYPE_FUNCTION_NODE, + "node", gandiva_node, + "name", name, + "return-type", return_type, + NULL); + auto priv = GGANDIVA_FUNCTION_NODE_GET_PRIVATE(function_node); + for (auto node = parameters; node; node = g_list_next(node)) { + auto parameter = GGANDIVA_NODE(node->data); + priv->parameters = g_list_prepend(priv->parameters, g_object_ref(parameter)); + } + priv->parameters = g_list_reverse(priv->parameters); + return GGANDIVA_FUNCTION_NODE(function_node); +} + +GGandivaLiteralNode * +ggandiva_literal_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node, + GArrowDataType *return_type) +{ + auto gandiva_literal_node = + std::static_pointer_cast<gandiva::LiteralNode>(*gandiva_node); + + GGandivaLiteralNode *literal_node; + if (gandiva_literal_node->is_null()) { + literal_node = + GGANDIVA_LITERAL_NODE(g_object_new(GGANDIVA_TYPE_NULL_LITERAL_NODE, + "node", gandiva_node, + "return-type", return_type, + NULL)); + } else { + GType type; + + auto arrow_return_type = gandiva_literal_node->return_type(); + switch (arrow_return_type->id()) { + case arrow::Type::BOOL: + type = GGANDIVA_TYPE_BOOLEAN_LITERAL_NODE; + break; + case arrow::Type::type::UINT8: + type = GGANDIVA_TYPE_UINT8_LITERAL_NODE; + break; + case arrow::Type::type::UINT16: + type = GGANDIVA_TYPE_UINT16_LITERAL_NODE; + break; + case arrow::Type::type::UINT32: + type = GGANDIVA_TYPE_UINT32_LITERAL_NODE; + break; + case arrow::Type::type::UINT64: + type = GGANDIVA_TYPE_UINT64_LITERAL_NODE; + break; + case arrow::Type::type::INT8: + type = GGANDIVA_TYPE_INT8_LITERAL_NODE; + break; + case arrow::Type::type::INT16: + type = GGANDIVA_TYPE_INT16_LITERAL_NODE; + break; + case arrow::Type::type::INT32: + type = GGANDIVA_TYPE_INT32_LITERAL_NODE; + break; + case arrow::Type::type::INT64: + type = GGANDIVA_TYPE_INT64_LITERAL_NODE; + break; + case arrow::Type::type::FLOAT: + type = GGANDIVA_TYPE_FLOAT_LITERAL_NODE; + break; + case arrow::Type::type::DOUBLE: + type = GGANDIVA_TYPE_DOUBLE_LITERAL_NODE; + break; + case arrow::Type::type::STRING: + type = GGANDIVA_TYPE_STRING_LITERAL_NODE; + break; + case arrow::Type::type::BINARY: + type = GGANDIVA_TYPE_BINARY_LITERAL_NODE; + break; + default: + type = GGANDIVA_TYPE_LITERAL_NODE; + break; + } + + if (return_type) { + literal_node = + GGANDIVA_LITERAL_NODE(g_object_new(type, + "node", gandiva_node, + "return-type", return_type, + NULL)); + } else { + return_type = garrow_data_type_new_raw(&arrow_return_type); + literal_node = + GGANDIVA_LITERAL_NODE(g_object_new(type, + "node", gandiva_node, + "return-type", return_type, + NULL)); + g_object_unref(return_type); + } + } + + return literal_node; +} + +GGandivaIfNode * +ggandiva_if_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node, + GGandivaNode *condition_node, + GGandivaNode *then_node, + GGandivaNode *else_node, + GArrowDataType *return_type) +{ + auto if_node = g_object_new(GGANDIVA_TYPE_IF_NODE, + "node", gandiva_node, + "condition-node", condition_node, + "then-node", then_node, + "else-node", else_node, + "return-type", return_type, + NULL); + return GGANDIVA_IF_NODE(if_node); +} + +GGandivaBooleanNode * +ggandiva_boolean_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node, + GList *children) +{ + auto gandiva_boolean_node = + std::static_pointer_cast<gandiva::BooleanNode>(*gandiva_node); + + GType type; + if (gandiva_boolean_node->expr_type() == gandiva::BooleanNode::AND) { + type = GGANDIVA_TYPE_AND_NODE; + } else { + type = GGANDIVA_TYPE_OR_NODE; + } + auto boolean_node = g_object_new(type, + "node", gandiva_node, + NULL); + auto priv = GGANDIVA_BOOLEAN_NODE_GET_PRIVATE(boolean_node); + priv->children = g_list_copy_deep(children, + reinterpret_cast<GCopyFunc>(g_object_ref), + NULL); + return GGANDIVA_BOOLEAN_NODE(boolean_node); +} diff --git a/src/arrow/c_glib/gandiva-glib/node.h b/src/arrow/c_glib/gandiva-glib/node.h new file mode 100644 index 000000000..a16f26c65 --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/node.h @@ -0,0 +1,395 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/arrow-glib.h> + +#include <gandiva-glib/version.h> + +G_BEGIN_DECLS + +#define GGANDIVA_TYPE_NODE (ggandiva_node_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaNode, + ggandiva_node, + GGANDIVA, + NODE, + GObject) + +struct _GGandivaNodeClass +{ + GObjectClass parent_class; +}; + +gchar *ggandiva_node_to_string(GGandivaNode *node); + + +#define GGANDIVA_TYPE_FIELD_NODE (ggandiva_field_node_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaFieldNode, + ggandiva_field_node, + GGANDIVA, + FIELD_NODE, + GGandivaNode) +struct _GGandivaFieldNodeClass +{ + GGandivaNodeClass parent_class; +}; + +GGandivaFieldNode *ggandiva_field_node_new(GArrowField *field); + + +#define GGANDIVA_TYPE_FUNCTION_NODE (ggandiva_function_node_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaFunctionNode, + ggandiva_function_node, + GGANDIVA, + FUNCTION_NODE, + GGandivaNode) +struct _GGandivaFunctionNodeClass +{ + GGandivaNodeClass parent_class; +}; + +GGandivaFunctionNode * +ggandiva_function_node_new(const gchar *name, + GList *parameters, + GArrowDataType *return_type); +GList * +ggandiva_function_node_get_parameters(GGandivaFunctionNode *node); + + +#define GGANDIVA_TYPE_LITERAL_NODE (ggandiva_literal_node_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaLiteralNode, + ggandiva_literal_node, + GGANDIVA, + LITERAL_NODE, + GGandivaNode) +struct _GGandivaLiteralNodeClass +{ + GGandivaNodeClass parent_class; +}; + + +#define GGANDIVA_TYPE_NULL_LITERAL_NODE (ggandiva_null_literal_node_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaNullLiteralNode, + ggandiva_null_literal_node, + GGANDIVA, + NULL_LITERAL_NODE, + GGandivaLiteralNode) +struct _GGandivaNullLiteralNodeClass +{ + GGandivaLiteralNodeClass parent_class; +}; + +GGandivaNullLiteralNode * +ggandiva_null_literal_node_new(GArrowDataType *return_type, + GError **error); + + +#define GGANDIVA_TYPE_BOOLEAN_LITERAL_NODE (ggandiva_boolean_literal_node_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaBooleanLiteralNode, + ggandiva_boolean_literal_node, + GGANDIVA, + BOOLEAN_LITERAL_NODE, + GGandivaLiteralNode) +struct _GGandivaBooleanLiteralNodeClass +{ + GGandivaLiteralNodeClass parent_class; +}; + +GGandivaBooleanLiteralNode * +ggandiva_boolean_literal_node_new(gboolean value); +gboolean +ggandiva_boolean_literal_node_get_value(GGandivaBooleanLiteralNode *node); + + +#define GGANDIVA_TYPE_INT8_LITERAL_NODE (ggandiva_int8_literal_node_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaInt8LiteralNode, + ggandiva_int8_literal_node, + GGANDIVA, + INT8_LITERAL_NODE, + GGandivaLiteralNode) +struct _GGandivaInt8LiteralNodeClass +{ + GGandivaLiteralNodeClass parent_class; +}; + +GGandivaInt8LiteralNode * +ggandiva_int8_literal_node_new(gint8 value); +gint8 +ggandiva_int8_literal_node_get_value(GGandivaInt8LiteralNode *node); + + +#define GGANDIVA_TYPE_UINT8_LITERAL_NODE (ggandiva_uint8_literal_node_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaUInt8LiteralNode, + ggandiva_uint8_literal_node, + GGANDIVA, + UINT8_LITERAL_NODE, + GGandivaLiteralNode) +struct _GGandivaUInt8LiteralNodeClass +{ + GGandivaLiteralNodeClass parent_class; +}; + +GGandivaUInt8LiteralNode * +ggandiva_uint8_literal_node_new(guint8 value); +guint8 +ggandiva_uint8_literal_node_get_value(GGandivaUInt8LiteralNode *node); + + +#define GGANDIVA_TYPE_INT16_LITERAL_NODE (ggandiva_int16_literal_node_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaInt16LiteralNode, + ggandiva_int16_literal_node, + GGANDIVA, + INT16_LITERAL_NODE, + GGandivaLiteralNode) +struct _GGandivaInt16LiteralNodeClass +{ + GGandivaLiteralNodeClass parent_class; +}; + +GGandivaInt16LiteralNode * +ggandiva_int16_literal_node_new(gint16 value); +gint16 +ggandiva_int16_literal_node_get_value(GGandivaInt16LiteralNode *node); + + +#define GGANDIVA_TYPE_UINT16_LITERAL_NODE (ggandiva_uint16_literal_node_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaUInt16LiteralNode, + ggandiva_uint16_literal_node, + GGANDIVA, + UINT16_LITERAL_NODE, + GGandivaLiteralNode) +struct _GGandivaUInt16LiteralNodeClass +{ + GGandivaLiteralNodeClass parent_class; +}; + +GGandivaUInt16LiteralNode * +ggandiva_uint16_literal_node_new(guint16 value); +guint16 +ggandiva_uint16_literal_node_get_value(GGandivaUInt16LiteralNode *node); + + +#define GGANDIVA_TYPE_INT32_LITERAL_NODE (ggandiva_int32_literal_node_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaInt32LiteralNode, + ggandiva_int32_literal_node, + GGANDIVA, + INT32_LITERAL_NODE, + GGandivaLiteralNode) +struct _GGandivaInt32LiteralNodeClass +{ + GGandivaLiteralNodeClass parent_class; +}; + +GGandivaInt32LiteralNode * +ggandiva_int32_literal_node_new(gint32 value); +gint32 +ggandiva_int32_literal_node_get_value(GGandivaInt32LiteralNode *node); + + +#define GGANDIVA_TYPE_UINT32_LITERAL_NODE (ggandiva_uint32_literal_node_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaUInt32LiteralNode, + ggandiva_uint32_literal_node, + GGANDIVA, + UINT32_LITERAL_NODE, + GGandivaLiteralNode) +struct _GGandivaUInt32LiteralNodeClass +{ + GGandivaLiteralNodeClass parent_class; +}; + +GGandivaUInt32LiteralNode * +ggandiva_uint32_literal_node_new(guint32 value); +guint32 +ggandiva_uint32_literal_node_get_value(GGandivaUInt32LiteralNode *node); + + +#define GGANDIVA_TYPE_INT64_LITERAL_NODE (ggandiva_int64_literal_node_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaInt64LiteralNode, + ggandiva_int64_literal_node, + GGANDIVA, + INT64_LITERAL_NODE, + GGandivaLiteralNode) +struct _GGandivaInt64LiteralNodeClass +{ + GGandivaLiteralNodeClass parent_class; +}; + +GGandivaInt64LiteralNode * +ggandiva_int64_literal_node_new(gint64 value); +gint64 +ggandiva_int64_literal_node_get_value(GGandivaInt64LiteralNode *node); + + +#define GGANDIVA_TYPE_UINT64_LITERAL_NODE (ggandiva_uint64_literal_node_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaUInt64LiteralNode, + ggandiva_uint64_literal_node, + GGANDIVA, + UINT64_LITERAL_NODE, + GGandivaLiteralNode) +struct _GGandivaUInt64LiteralNodeClass +{ + GGandivaLiteralNodeClass parent_class; +}; + +GGandivaUInt64LiteralNode * +ggandiva_uint64_literal_node_new(guint64 value); +guint64 +ggandiva_uint64_literal_node_get_value(GGandivaUInt64LiteralNode *node); + + +#define GGANDIVA_TYPE_FLOAT_LITERAL_NODE (ggandiva_float_literal_node_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaFloatLiteralNode, + ggandiva_float_literal_node, + GGANDIVA, + FLOAT_LITERAL_NODE, + GGandivaLiteralNode) +struct _GGandivaFloatLiteralNodeClass +{ + GGandivaLiteralNodeClass parent_class; +}; + +GGandivaFloatLiteralNode * +ggandiva_float_literal_node_new(gfloat value); +gfloat +ggandiva_float_literal_node_get_value(GGandivaFloatLiteralNode *node); + + +#define GGANDIVA_TYPE_DOUBLE_LITERAL_NODE (ggandiva_double_literal_node_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaDoubleLiteralNode, + ggandiva_double_literal_node, + GGANDIVA, + DOUBLE_LITERAL_NODE, + GGandivaLiteralNode) +struct _GGandivaDoubleLiteralNodeClass +{ + GGandivaLiteralNodeClass parent_class; +}; + +GGandivaDoubleLiteralNode * +ggandiva_double_literal_node_new(gdouble value); +gdouble +ggandiva_double_literal_node_get_value(GGandivaDoubleLiteralNode *node); + + +#define GGANDIVA_TYPE_BINARY_LITERAL_NODE (ggandiva_binary_literal_node_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaBinaryLiteralNode, + ggandiva_binary_literal_node, + GGANDIVA, + BINARY_LITERAL_NODE, + GGandivaLiteralNode) +struct _GGandivaBinaryLiteralNodeClass +{ + GGandivaLiteralNodeClass parent_class; +}; + +GGandivaBinaryLiteralNode * +ggandiva_binary_literal_node_new(const guint8 *value, + gsize size); +GGandivaBinaryLiteralNode * +ggandiva_binary_literal_node_new_bytes(GBytes *value); +GBytes * +ggandiva_binary_literal_node_get_value(GGandivaBinaryLiteralNode *node); + + +#define GGANDIVA_TYPE_STRING_LITERAL_NODE (ggandiva_string_literal_node_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaStringLiteralNode, + ggandiva_string_literal_node, + GGANDIVA, + STRING_LITERAL_NODE, + GGandivaLiteralNode) +struct _GGandivaStringLiteralNodeClass +{ + GGandivaLiteralNodeClass parent_class; +}; + +GGandivaStringLiteralNode * +ggandiva_string_literal_node_new(const gchar *value); +const gchar * +ggandiva_string_literal_node_get_value(GGandivaStringLiteralNode *node); + + +#define GGANDIVA_TYPE_IF_NODE (ggandiva_if_node_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaIfNode, + ggandiva_if_node, + GGANDIVA, + IF_NODE, + GGandivaNode) +struct _GGandivaIfNodeClass +{ + GGandivaNodeClass parent_class; +}; + +GGandivaIfNode * +ggandiva_if_node_new(GGandivaNode *condition_node, + GGandivaNode *then_node, + GGandivaNode *else_node, + GArrowDataType *return_type, + GError **error); + + +#define GGANDIVA_TYPE_BOOLEAN_NODE (ggandiva_boolean_node_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaBooleanNode, + ggandiva_boolean_node, + GGANDIVA, + BOOLEAN_NODE, + GGandivaNode) + +struct _GGandivaBooleanNodeClass +{ + GGandivaNodeClass parent_class; +}; + +GGANDIVA_AVAILABLE_IN_0_17 +GList * +ggandiva_boolean_node_get_children(GGandivaBooleanNode *node); + + +#define GGANDIVA_TYPE_AND_NODE (ggandiva_and_node_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaAndNode, + ggandiva_and_node, + GGANDIVA, + AND_NODE, + GGandivaBooleanNode) +struct _GGandivaAndNodeClass +{ + GGandivaBooleanNodeClass parent_class; +}; + +GGANDIVA_AVAILABLE_IN_0_17 +GGandivaAndNode * +ggandiva_and_node_new(GList *children); + + +#define GGANDIVA_TYPE_OR_NODE (ggandiva_or_node_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaOrNode, + ggandiva_or_node, + GGANDIVA, + OR_NODE, + GGandivaBooleanNode) +struct _GGandivaOrNodeClass +{ + GGandivaBooleanNodeClass parent_class; +}; + +GGANDIVA_AVAILABLE_IN_0_17 +GGandivaOrNode * +ggandiva_or_node_new(GList *children); + +G_END_DECLS diff --git a/src/arrow/c_glib/gandiva-glib/node.hpp b/src/arrow/c_glib/gandiva-glib/node.hpp new file mode 100644 index 000000000..51dc2cbbf --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/node.hpp @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <memory> + +#include <gandiva/node.h> +#include <gandiva/tree_expr_builder.h> + +#include <gandiva-glib/node.h> + +std::shared_ptr<gandiva::Node> ggandiva_node_get_raw(GGandivaNode *node); +GGandivaFieldNode * +ggandiva_field_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node, + GArrowField *field); +GGandivaFunctionNode * +ggandiva_function_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node, + const gchar *name, + GList *parameters, + GArrowDataType *return_type); +GGandivaLiteralNode * +ggandiva_literal_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node, + GArrowDataType *return_type); +GGandivaIfNode * +ggandiva_if_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node, + GGandivaNode *condition_node, + GGandivaNode *then_node, + GGandivaNode *else_node, + GArrowDataType *return_type); +GGandivaBooleanNode * +ggandiva_boolean_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node, + GList *children); diff --git a/src/arrow/c_glib/gandiva-glib/projector.cpp b/src/arrow/c_glib/gandiva-glib/projector.cpp new file mode 100644 index 000000000..c91cde84c --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/projector.cpp @@ -0,0 +1,391 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/basic-array.hpp> +#include <arrow-glib/error.hpp> +#include <arrow-glib/record-batch.hpp> +#include <arrow-glib/schema.hpp> + +#include <gandiva-glib/expression.hpp> +#include <gandiva-glib/projector.hpp> +#include <gandiva-glib/selection-vector.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: projector + * @title: Projector classes + * @include: gandiva-glib/gandiva-glib.h + * + * #GGandivaProjector is a class that evaluates given expressions + * against the given record batches. + * + * #GGandivaSelectableProjector is a class that evaluates given expressions + * against the given selected records in the given record batches. + * + * Since: 0.12.0 + */ + +typedef struct GGandivaProjectorPrivate_ { + std::shared_ptr<gandiva::Projector> projector; + GArrowSchema *schema; + GList *expressions; +} GGandivaProjectorPrivate; + +enum { + PROP_PROJECTOR = 1, + PROP_SCHEMA, + PROP_EXPRESSIONS, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GGandivaProjector, + ggandiva_projector, + G_TYPE_OBJECT) + +#define GGANDIVA_PROJECTOR_GET_PRIVATE(obj) \ + static_cast<GGandivaProjectorPrivate *>( \ + ggandiva_projector_get_instance_private( \ + GGANDIVA_PROJECTOR(obj))) + +static void +ggandiva_projector_dispose(GObject *object) +{ + auto priv = GGANDIVA_PROJECTOR_GET_PRIVATE(object); + + if (priv->schema) { + g_object_unref(G_OBJECT(priv->schema)); + priv->schema = nullptr; + } + + g_list_free_full(priv->expressions, g_object_unref); + priv->expressions = nullptr; + + G_OBJECT_CLASS(ggandiva_projector_parent_class)->dispose(object); +} + +static void +ggandiva_projector_finalize(GObject *object) +{ + auto priv = GGANDIVA_PROJECTOR_GET_PRIVATE(object); + + priv->projector.~shared_ptr(); + + G_OBJECT_CLASS(ggandiva_projector_parent_class)->finalize(object); +} + +static void +ggandiva_projector_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GGANDIVA_PROJECTOR_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_PROJECTOR: + priv->projector = + *static_cast<std::shared_ptr<gandiva::Projector> *>(g_value_get_pointer(value)); + break; + case PROP_SCHEMA: + priv->schema = GARROW_SCHEMA(g_value_dup_object(value)); + break; + case PROP_EXPRESSIONS: + priv->expressions = + g_list_copy_deep(static_cast<GList *>(g_value_get_pointer(value)), + reinterpret_cast<GCopyFunc>(g_object_ref), + nullptr); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +ggandiva_projector_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GGANDIVA_PROJECTOR_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_SCHEMA: + g_value_set_object(value, priv->schema); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +ggandiva_projector_init(GGandivaProjector *object) +{ + auto priv = GGANDIVA_PROJECTOR_GET_PRIVATE(object); + new(&priv->projector) std::shared_ptr<gandiva::Projector>; +} + +static void +ggandiva_projector_class_init(GGandivaProjectorClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = ggandiva_projector_dispose; + gobject_class->finalize = ggandiva_projector_finalize; + gobject_class->set_property = ggandiva_projector_set_property; + gobject_class->get_property = ggandiva_projector_get_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("projector", + "Projector", + "The raw std::shared<gandiva::Projector> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_PROJECTOR, spec); + + spec = g_param_spec_object("schema", + "Schema", + "The schema of the projector", + GARROW_TYPE_SCHEMA, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_SCHEMA, spec); + + spec = g_param_spec_pointer("expressions", + "Expressions", + "The expressions for the projector", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_EXPRESSIONS, spec); +} + +/** + * ggandiva_projector_new: + * @schema: A #GArrowSchema. + * @expressions: (element-type GGandivaExpression): The built expressions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GGandivaProjector on success, + * %NULL on error. + * + * Since: 0.12.0 + */ +GGandivaProjector * +ggandiva_projector_new(GArrowSchema *schema, + GList *expressions, + GError **error) +{ + auto arrow_schema = garrow_schema_get_raw(schema); + std::vector<std::shared_ptr<gandiva::Expression>> gandiva_expressions; + for (auto node = expressions; node; node = g_list_next(node)) { + auto expression = GGANDIVA_EXPRESSION(node->data); + auto gandiva_expression = ggandiva_expression_get_raw(expression); + gandiva_expressions.push_back(gandiva_expression); + } + std::shared_ptr<gandiva::Projector> gandiva_projector; + auto status = + gandiva_projector->Make(arrow_schema, + gandiva_expressions, + &gandiva_projector); + if (garrow_error_check(error, status, "[gandiva][projector][new]")) { + return ggandiva_projector_new_raw(&gandiva_projector, + schema, + expressions); + } else { + return NULL; + } +} + +/** + * ggandiva_projector_evaluate: + * @projector: A #GGandivaProjector. + * @record_batch: A #GArrowRecordBatch. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (element-type GArrowArray) (nullable) (transfer full): + * The #GArrowArray as the result evaluated on success, %NULL on error. + * + * Since: 0.12.0 + */ +GList * +ggandiva_projector_evaluate(GGandivaProjector *projector, + GArrowRecordBatch *record_batch, + GError **error) +{ + auto gandiva_projector = ggandiva_projector_get_raw(projector); + auto arrow_record_batch = garrow_record_batch_get_raw(record_batch); + auto memory_pool = arrow::default_memory_pool(); + arrow::ArrayVector arrow_arrays; + auto status = + gandiva_projector->Evaluate(*arrow_record_batch, + memory_pool, + &arrow_arrays); + if (garrow_error_check(error, status, "[gandiva][projector][evaluate]")) { + GList *arrays = NULL; + for (auto arrow_array : arrow_arrays) { + auto array = garrow_array_new_raw(&arrow_array); + arrays = g_list_prepend(arrays, array); + } + return g_list_reverse(arrays); + } else { + return NULL; + } +} + + +G_DEFINE_TYPE(GGandivaSelectableProjector, + ggandiva_selectable_projector, + GGANDIVA_TYPE_PROJECTOR) + +static void +ggandiva_selectable_projector_init(GGandivaSelectableProjector *object) +{ +} + +static void +ggandiva_selectable_projector_class_init(GGandivaSelectableProjectorClass *klass) +{ +} + +/** + * ggandiva_selectable_projector_new: + * @schema: A #GArrowSchema. + * @expressions: (element-type GGandivaExpression): The built expressions. + * @mode: A #GGandivaSelectionVectorMode to be used. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GGandivaProjector on success, + * %NULL on error. + * + * Since: 4.0.0 + */ +GGandivaSelectableProjector * +ggandiva_selectable_projector_new(GArrowSchema *schema, + GList *expressions, + GGandivaSelectionVectorMode mode, + GError **error) +{ + auto arrow_schema = garrow_schema_get_raw(schema); + std::vector<std::shared_ptr<gandiva::Expression>> gandiva_expressions; + for (auto node = expressions; node; node = g_list_next(node)) { + auto expression = GGANDIVA_EXPRESSION(node->data); + auto gandiva_expression = ggandiva_expression_get_raw(expression); + gandiva_expressions.push_back(gandiva_expression); + } + auto gandiva_mode = static_cast<gandiva::SelectionVector::Mode>(mode); + auto gandiva_configuration = + gandiva::ConfigurationBuilder::DefaultConfiguration(); + std::shared_ptr<gandiva::Projector> gandiva_projector; + auto status = gandiva_projector->Make(arrow_schema, + gandiva_expressions, + gandiva_mode, + gandiva_configuration, + &gandiva_projector); + if (garrow_error_check(error, + status, + "[gandiva][selectable-projector][new]")) { + return ggandiva_selectable_projector_new_raw(&gandiva_projector, + schema, + expressions); + } else { + return NULL; + } +} + +/** + * ggandiva_selectable_projector_evaluate: + * @projector: A #GGandivaSelectableProjector. + * @record_batch: A #GArrowRecordBatch. + * @selection_vector: A #GGandivaSelectionVector that specifies + * the filtered row positions. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (element-type GArrowArray) (nullable) (transfer full): + * The #GArrowArray as the result evaluated on success, %NULL on error. + * + * Since: 4.0.0 + */ +GList * +ggandiva_selectable_projector_evaluate( + GGandivaSelectableProjector *projector, + GArrowRecordBatch *record_batch, + GGandivaSelectionVector *selection_vector, + GError **error) +{ + auto gandiva_projector = + ggandiva_projector_get_raw(GGANDIVA_PROJECTOR(projector)); + auto arrow_record_batch = garrow_record_batch_get_raw(record_batch); + auto gandiva_selection_vector = + ggandiva_selection_vector_get_raw(selection_vector).get(); + auto memory_pool = arrow::default_memory_pool(); + arrow::ArrayVector arrow_arrays; + auto status = + gandiva_projector->Evaluate(*arrow_record_batch, + gandiva_selection_vector, + memory_pool, + &arrow_arrays); + if (garrow_error_check(error, + status, + "[gandiva][selectable-projector][evaluate]")) { + GList *arrays = NULL; + for (auto arrow_array : arrow_arrays) { + auto array = garrow_array_new_raw(&arrow_array); + arrays = g_list_prepend(arrays, array); + } + return g_list_reverse(arrays); + } else { + return NULL; + } +} + +G_END_DECLS + +GGandivaProjector * +ggandiva_projector_new_raw( + std::shared_ptr<gandiva::Projector> *gandiva_projector, + GArrowSchema *schema, + GList *expressions) +{ + auto projector = g_object_new(GGANDIVA_TYPE_PROJECTOR, + "projector", gandiva_projector, + "schema", schema, + "expressions", expressions, + NULL); + return GGANDIVA_PROJECTOR(projector); +} + +GGandivaSelectableProjector * +ggandiva_selectable_projector_new_raw( + std::shared_ptr<gandiva::Projector> *gandiva_projector, + GArrowSchema *schema, + GList *expressions) +{ + auto projector = g_object_new(GGANDIVA_TYPE_SELECTABLE_PROJECTOR, + "projector", gandiva_projector, + NULL); + return GGANDIVA_SELECTABLE_PROJECTOR(projector); +} + +std::shared_ptr<gandiva::Projector> +ggandiva_projector_get_raw(GGandivaProjector *projector) +{ + auto priv = GGANDIVA_PROJECTOR_GET_PRIVATE(projector); + return priv->projector; +} diff --git a/src/arrow/c_glib/gandiva-glib/projector.h b/src/arrow/c_glib/gandiva-glib/projector.h new file mode 100644 index 000000000..5dd218b80 --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/projector.h @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <gandiva-glib/selection-vector.h> + +G_BEGIN_DECLS + +#define GGANDIVA_TYPE_PROJECTOR (ggandiva_projector_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaProjector, + ggandiva_projector, + GGANDIVA, + PROJECTOR, + GObject) + +struct _GGandivaProjectorClass +{ + GObjectClass parent_class; +}; + +GGandivaProjector * +ggandiva_projector_new(GArrowSchema *schema, + GList *expressions, + GError **error); +GList * +ggandiva_projector_evaluate(GGandivaProjector *projector, + GArrowRecordBatch *record_batch, + GError **error); + + +#define GGANDIVA_TYPE_SELECTABLE_PROJECTOR \ + (ggandiva_selectable_projector_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaSelectableProjector, + ggandiva_selectable_projector, + GGANDIVA, + SELECTABLE_PROJECTOR, + GGandivaProjector) + +struct _GGandivaSelectableProjectorClass +{ + GGandivaProjectorClass parent_class; +}; + +GGANDIVA_AVAILABLE_IN_4_0 +GGandivaSelectableProjector * +ggandiva_selectable_projector_new(GArrowSchema *schema, + GList *expressions, + GGandivaSelectionVectorMode mode, + GError **error); +GGANDIVA_AVAILABLE_IN_4_0 +GList * +ggandiva_selectable_projector_evaluate(GGandivaSelectableProjector *projector, + GArrowRecordBatch *record_batch, + GGandivaSelectionVector *selection_vector, + GError **error); + + +G_END_DECLS diff --git a/src/arrow/c_glib/gandiva-glib/projector.hpp b/src/arrow/c_glib/gandiva-glib/projector.hpp new file mode 100644 index 000000000..b372f32f5 --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/projector.hpp @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <memory> + +#include <gandiva/projector.h> + +#include <gandiva-glib/projector.h> + +GGandivaProjector * +ggandiva_projector_new_raw( + std::shared_ptr<gandiva::Projector> *gandiva_projector, + GArrowSchema *schema, + GList *expressions); +GGandivaSelectableProjector * +ggandiva_selectable_projector_new_raw( + std::shared_ptr<gandiva::Projector> *gandiva_projector, + GArrowSchema *schema, + GList *expressions); +std::shared_ptr<gandiva::Projector> +ggandiva_projector_get_raw(GGandivaProjector *projector); diff --git a/src/arrow/c_glib/gandiva-glib/selection-vector.cpp b/src/arrow/c_glib/gandiva-glib/selection-vector.cpp new file mode 100644 index 000000000..77c3cf2aa --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/selection-vector.cpp @@ -0,0 +1,323 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/basic-array.hpp> +#include <arrow-glib/error.hpp> + +#include <gandiva-glib/selection-vector.hpp> + + +G_BEGIN_DECLS + +/** + * SECTION: selection-vector + * @section_id: selection-vector-classes + * @title: Selection vector classes + * @include: gandiva-glib/gandiva-glib.h + * + * #GGandivaSelectionVector is a base class for a selection vector. + * + * #GGandivaUInt16SelectionVector is a class for a selection vector + * that uses 16-bit unsigned integer for each index. + * + * #GGandivaUInt32SelectionVector is a class for a selection vector + * that uses 32-bit unsigned integer for each index. + * + * #GGandivaUInt64SelectionVector is a class for a selection vector + * that uses 64-bit unsigned integer for each index. + * + * Since: 4.0.0 + */ + +typedef struct GGandivaSelectionVectorPrivate_ { + std::shared_ptr<gandiva::SelectionVector> selection_vector; +} GGandivaSelectionVectorPrivate; + +enum { + PROP_SELECTION_VECTOR = 1, +}; + +G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GGandivaSelectionVector, + ggandiva_selection_vector, + G_TYPE_OBJECT) + +#define GGANDIVA_SELECTION_VECTOR_GET_PRIVATE(object) \ + static_cast<GGandivaSelectionVectorPrivate *>( \ + ggandiva_selection_vector_get_instance_private( \ + GGANDIVA_SELECTION_VECTOR(object))) + +static void +ggandiva_selection_vector_finalize(GObject *object) +{ + auto priv = GGANDIVA_SELECTION_VECTOR_GET_PRIVATE(object); + + priv->selection_vector.~shared_ptr(); + + G_OBJECT_CLASS(ggandiva_selection_vector_parent_class)->finalize(object); +} + +static void +ggandiva_selection_vector_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GGANDIVA_SELECTION_VECTOR_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_SELECTION_VECTOR: + priv->selection_vector = + *static_cast<std::shared_ptr<gandiva::SelectionVector> *>( + g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +ggandiva_selection_vector_init(GGandivaSelectionVector *object) +{ + auto priv = GGANDIVA_SELECTION_VECTOR_GET_PRIVATE(object); + new(&priv->selection_vector) std::shared_ptr<gandiva::SelectionVector>; +} + +static void +ggandiva_selection_vector_class_init(GGandivaSelectionVectorClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = ggandiva_selection_vector_finalize; + gobject_class->set_property = ggandiva_selection_vector_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("selection-vector", + "Selection vector", + "The raw std::shared<gandiva::SelectionVector> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_SELECTION_VECTOR, spec); +} + +/** + * ggandiva_selection_vector_get_mode: + * @selection_vector: A #GGandivaSelectionVector. + * + * Returns: A #GGandivaSelectionVectorMode for the selection vector. + * + * Since: 4.0.0 + */ +GGandivaSelectionVectorMode +ggandiva_selection_vector_get_mode(GGandivaSelectionVector *selection_vector) +{ + auto gandiva_selection_vector = + ggandiva_selection_vector_get_raw(selection_vector); + auto gandiva_mode = gandiva_selection_vector->GetMode(); + return static_cast<GGandivaSelectionVectorMode>(gandiva_mode); +} + +/** + * ggandiva_selection_vector_to_array: + * @selection_vector: A #GGandivaSelectionVector. + * + * Returns: (transfer full): A #GArrowArray that has the same content + * of the selection vector. + * + * Since: 4.0.0 + */ +GArrowArray * +ggandiva_selection_vector_to_array(GGandivaSelectionVector *selection_vector) +{ + auto gandiva_selection_vector = + ggandiva_selection_vector_get_raw(selection_vector); + auto arrow_array = gandiva_selection_vector->ToArray(); + return garrow_array_new_raw(&arrow_array); +} + + +G_DEFINE_TYPE(GGandivaUInt16SelectionVector, + ggandiva_uint16_selection_vector, + GGANDIVA_TYPE_SELECTION_VECTOR) + +static void +ggandiva_uint16_selection_vector_init( + GGandivaUInt16SelectionVector *selection_vector) +{ +} + +static void +ggandiva_uint16_selection_vector_class_init( + GGandivaUInt16SelectionVectorClass *klass) +{ +} + +/** + * ggandiva_uint16_selection_vector_new: + * @max_slots: The max number of slots. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: A newly created #GGandivaUInt16SelectionVector. + * + * Since: 4.0.0 + */ +GGandivaUInt16SelectionVector * +ggandiva_uint16_selection_vector_new(gint64 max_slots, + GError **error) +{ + auto memory_pool = arrow::default_memory_pool(); + std::shared_ptr<gandiva::SelectionVector> gandiva_selection_vector; + auto status = gandiva::SelectionVector::MakeInt16(max_slots, + memory_pool, + &gandiva_selection_vector); + if (garrow_error_check(error, + status, + "[gandiva][uint16-selection-vector][new]")) { + return GGANDIVA_UINT16_SELECTION_VECTOR( + ggandiva_selection_vector_new_raw(&gandiva_selection_vector)); + } else { + return NULL; + } +} + + +G_DEFINE_TYPE(GGandivaUInt32SelectionVector, + ggandiva_uint32_selection_vector, + GGANDIVA_TYPE_SELECTION_VECTOR) + +static void +ggandiva_uint32_selection_vector_init( + GGandivaUInt32SelectionVector *selection_vector) +{ +} + +static void +ggandiva_uint32_selection_vector_class_init( + GGandivaUInt32SelectionVectorClass *klass) +{ +} + +/** + * ggandiva_uint32_selection_vector_new: + * @max_slots: The max number of slots. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: A newly created #GGandivaUInt32SelectionVector. + * + * Since: 4.0.0 + */ +GGandivaUInt32SelectionVector * +ggandiva_uint32_selection_vector_new(gint64 max_slots, + GError **error) +{ + auto memory_pool = arrow::default_memory_pool(); + std::shared_ptr<gandiva::SelectionVector> gandiva_selection_vector; + auto status = gandiva::SelectionVector::MakeInt32(max_slots, + memory_pool, + &gandiva_selection_vector); + if (garrow_error_check(error, + status, + "[gandiva][uint32-selection-vector][new]")) { + return GGANDIVA_UINT32_SELECTION_VECTOR( + ggandiva_selection_vector_new_raw(&gandiva_selection_vector)); + } else { + return NULL; + } +} + + +G_DEFINE_TYPE(GGandivaUInt64SelectionVector, + ggandiva_uint64_selection_vector, + GGANDIVA_TYPE_SELECTION_VECTOR) + +static void +ggandiva_uint64_selection_vector_init( + GGandivaUInt64SelectionVector *selection_vector) +{ +} + +static void +ggandiva_uint64_selection_vector_class_init( + GGandivaUInt64SelectionVectorClass *klass) +{ +} + +/** + * ggandiva_uint64_selection_vector_new: + * @max_slots: The max number of slots. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: A newly created #GGandivaUInt64SelectionVector. + * + * Since: 4.0.0 + */ +GGandivaUInt64SelectionVector * +ggandiva_uint64_selection_vector_new(gint64 max_slots, + GError **error) +{ + auto memory_pool = arrow::default_memory_pool(); + std::shared_ptr<gandiva::SelectionVector> gandiva_selection_vector; + auto status = gandiva::SelectionVector::MakeInt64(max_slots, + memory_pool, + &gandiva_selection_vector); + if (garrow_error_check(error, + status, + "[gandiva][uint64-selection-vector][new]")) { + return GGANDIVA_UINT64_SELECTION_VECTOR( + ggandiva_selection_vector_new_raw(&gandiva_selection_vector)); + } else { + return NULL; + } +} + + +G_END_DECLS + + +GGandivaSelectionVector * +ggandiva_selection_vector_new_raw( + std::shared_ptr<gandiva::SelectionVector> *gandiva_selection_vector) +{ + GType type = GGANDIVA_TYPE_SELECTION_VECTOR; + switch ((*gandiva_selection_vector)->GetMode()) { + case gandiva::SelectionVector::Mode::MODE_UINT16: + type = GGANDIVA_TYPE_UINT16_SELECTION_VECTOR; + break; + case gandiva::SelectionVector::Mode::MODE_UINT32: + type = GGANDIVA_TYPE_UINT32_SELECTION_VECTOR; + break; + case gandiva::SelectionVector::Mode::MODE_UINT64: + type = GGANDIVA_TYPE_UINT64_SELECTION_VECTOR; + break; + default: + break; + } + auto selection_vector = + g_object_new(type, + "selection-vector", gandiva_selection_vector, + NULL); + return GGANDIVA_SELECTION_VECTOR(selection_vector); +} + +std::shared_ptr<gandiva::SelectionVector> +ggandiva_selection_vector_get_raw(GGandivaSelectionVector *selection_vector) +{ + auto priv = GGANDIVA_SELECTION_VECTOR_GET_PRIVATE(selection_vector); + return priv->selection_vector; +} diff --git a/src/arrow/c_glib/gandiva-glib/selection-vector.h b/src/arrow/c_glib/gandiva-glib/selection-vector.h new file mode 100644 index 000000000..029c4cde5 --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/selection-vector.h @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/arrow-glib.h> + +#include <gandiva-glib/version.h> + +G_BEGIN_DECLS + +/** + * GGandivaSelectionVectorMode: + * @GGANDIVA_SELECTION_VECTOR_MODE_NONE: Selection vector isn't used. + * @GGANDIVA_SELECTION_VECTOR_MODE_UINT16: + * #GGandivaUInt16SelectionVector is used. + * @GGANDIVA_SELECTION_VECTOR_MODE_UINT32: + * #GGandivaUInt32SelectionVector is used. + * @GGANDIVA_SELECTION_VECTOR_MODE_UINT64: + * #GGandivaUInt64SelectionVector is used. + * + * They are corresponding to `gandiva::SelectionVector::Mode` values. + * + * Since: 4.0.0 + */ +typedef enum { + GGANDIVA_SELECTION_VECTOR_MODE_NONE, + GGANDIVA_SELECTION_VECTOR_MODE_UINT16, + GGANDIVA_SELECTION_VECTOR_MODE_UINT32, + GGANDIVA_SELECTION_VECTOR_MODE_UINT64, +} GGandivaSelectionVectorMode; + + +#define GGANDIVA_TYPE_SELECTION_VECTOR (ggandiva_selection_vector_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaSelectionVector, + ggandiva_selection_vector, + GGANDIVA, + SELECTION_VECTOR, + GObject) + +struct _GGandivaSelectionVectorClass +{ + GObjectClass parent_class; +}; + +GGANDIVA_AVAILABLE_IN_4_0 +GGandivaSelectionVectorMode +ggandiva_selection_vector_get_mode(GGandivaSelectionVector *selection_vector); + +GGANDIVA_AVAILABLE_IN_4_0 +GArrowArray * +ggandiva_selection_vector_to_array(GGandivaSelectionVector *selection_vector); + + +#define GGANDIVA_TYPE_UINT16_SELECTION_VECTOR \ + (ggandiva_uint16_selection_vector_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaUInt16SelectionVector, + ggandiva_uint16_selection_vector, + GGANDIVA, + UINT16_SELECTION_VECTOR, + GGandivaSelectionVector) + +struct _GGandivaUInt16SelectionVectorClass +{ + GGandivaSelectionVectorClass parent_class; +}; + +GGANDIVA_AVAILABLE_IN_4_0 +GGandivaUInt16SelectionVector * +ggandiva_uint16_selection_vector_new(gint64 max_slots, + GError **error); + + +#define GGANDIVA_TYPE_UINT32_SELECTION_VECTOR \ + (ggandiva_uint32_selection_vector_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaUInt32SelectionVector, + ggandiva_uint32_selection_vector, + GGANDIVA, + UINT32_SELECTION_VECTOR, + GGandivaSelectionVector) + +struct _GGandivaUInt32SelectionVectorClass +{ + GGandivaSelectionVectorClass parent_class; +}; + +GGANDIVA_AVAILABLE_IN_4_0 +GGandivaUInt32SelectionVector * +ggandiva_uint32_selection_vector_new(gint64 max_slots, + GError **error); + + +#define GGANDIVA_TYPE_UINT64_SELECTION_VECTOR \ + (ggandiva_uint64_selection_vector_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaUInt64SelectionVector, + ggandiva_uint64_selection_vector, + GGANDIVA, + UINT64_SELECTION_VECTOR, + GGandivaSelectionVector) + +struct _GGandivaUInt64SelectionVectorClass +{ + GGandivaSelectionVectorClass parent_class; +}; + +GGANDIVA_AVAILABLE_IN_4_0 +GGandivaUInt64SelectionVector * +ggandiva_uint64_selection_vector_new(gint64 max_slots, + GError **error); + + +G_END_DECLS diff --git a/src/arrow/c_glib/gandiva-glib/selection-vector.hpp b/src/arrow/c_glib/gandiva-glib/selection-vector.hpp new file mode 100644 index 000000000..aec583141 --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/selection-vector.hpp @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <memory> + +#include <gandiva/selection_vector.h> + +#include <gandiva-glib/selection-vector.h> + +GGandivaSelectionVector * +ggandiva_selection_vector_new_raw( + std::shared_ptr<gandiva::SelectionVector> *gandiva_selection_vector); +std::shared_ptr<gandiva::SelectionVector> +ggandiva_selection_vector_get_raw(GGandivaSelectionVector *selection_vector); diff --git a/src/arrow/c_glib/gandiva-glib/version.h.in b/src/arrow/c_glib/gandiva-glib/version.h.in new file mode 100644 index 000000000..3c9e87c9d --- /dev/null +++ b/src/arrow/c_glib/gandiva-glib/version.h.in @@ -0,0 +1,218 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <glib.h> + +/** + * SECTION: version + * @section_id: version-macros + * @title: Version related macros + * @include: gandiva-glib/gandiva-glib.h + * + * Gandiva GLib provides macros that can be used by C pre-processor. + * They are useful to check version related things at compile time. + */ + +/** + * GGANDIVA_VERSION_MAJOR: + * + * The major version. + * + * Since: 1.0.0 + */ +#define GGANDIVA_VERSION_MAJOR (@GGANDIVA_VERSION_MAJOR@) + +/** + * GGANDIVA_VERSION_MINOR: + * + * The minor version. + * + * Since: 1.0.0 + */ +#define GGANDIVA_VERSION_MINOR (@GGANDIVA_VERSION_MINOR@) + +/** + * GGANDIVA_VERSION_MICRO: + * + * The micro version. + * + * Since: 1.0.0 + */ +#define GGANDIVA_VERSION_MICRO (@GGANDIVA_VERSION_MICRO@) + +/** + * GGANDIVA_VERSION_TAG: + * + * The version tag. Normally, it's an empty string. It's "SNAPSHOT" + * for snapshot version. + * + * Since: 1.0.0 + */ +#define GGANDIVA_VERSION_TAG "@GGANDIVA_VERSION_TAG@" + +/** + * GGANDIVA_VERSION_CHECK: + * @major: A major version to check for. + * @minor: A minor version to check for. + * @micro: A micro version to check for. + * + * You can use this macro in C pre-processor. + * + * Returns: %TRUE if the compile time Gandiva GLib version is the + * same as or newer than the passed version, %FALSE otherwise. + * + * Since: 1.0.0 + */ +#define GGANDIVA_VERSION_CHECK(major, minor, micro) \ + (GGANDIVA_MAJOR_VERSION > (major) || \ + (GGANDIVA_MAJOR_VERSION == (major) && \ + GGANDIVA_MINOR_VERSION > (minor)) || \ + (GGANDIVA_MAJOR_VERSION == (major) && \ + GGANDIVA_MINOR_VERSION == (minor) && \ + GGANDIVA_MICRO_VERSION >= (micro))) + +/** + * GGANDIVA_DISABLE_DEPRECATION_WARNINGS: + * + * If this macro is defined, no deprecated warnings are produced. + * + * You must define this macro before including the + * gandiva-glib/gandiva-glib.h header. + * + * Since: 1.0.0 + */ + +#ifdef GGANDIVA_DISABLE_DEPRECATION_WARNINGS +# define GGANDIVA_DEPRECATED +# define GGANDIVA_DEPRECATED_FOR(function) +# define GGANDIVA_UNAVAILABLE(major, minor) +#else +# define GGANDIVA_DEPRECATED G_DEPRECATED +# define GGANDIVA_DEPRECATED_FOR(function) G_DEPRECATED_FOR(function) +# define GGANDIVA_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) +#endif + +/** + * GGANDIVA_VERSION_1_0: + * + * You can use this macro value for compile time API version check. + * + * Since: 1.0.0 + */ +#define GGANDIVA_VERSION_1_0 G_ENCODE_VERSION(1, 0) + +/** + * GGANDIVA_VERSION_4_0: + * + * You can use this macro value for compile time API version check. + * + * Since: 4.0.0 + */ +#define GGANDIVA_VERSION_4_0 G_ENCODE_VERSION(4, 0) + +/** + * GGANDIVA_VERSION_MIN_REQUIRED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GGANDIVA_VERSION_1_0. + * + * If you use any functions that is defined by newer version than + * %GGANDIVA_VERSION_MIN_REQUIRED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * gandiva-glib/gandiva-glib.h header. + * + * Since: 1.0.0 + */ +#ifndef GGANDIVA_VERSION_MIN_REQUIRED +# define GGANDIVA_VERSION_MIN_REQUIRED \ + G_ENCODE_VERSION(GGANDIVA_VERSION_MAJOR, GGANDIVA_VERSION_MINOR) +#endif + +/** + * GGANDIVA_VERSION_MAX_ALLOWED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GGANDIVA_VERSION_1_0. + * + * If you use any functions that is defined by newer version than + * %GGANDIVA_VERSION_MAX_ALLOWED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * gandiva-glib/gandiva-glib.h header. + * + * Since: 1.0.0 + */ +#ifndef GGANDIVA_VERSION_MAX_ALLOWED +# define GGANDIVA_VERSION_MAX_ALLOWED \ + G_ENCODE_VERSION(GGANDIVA_VERSION_MAJOR, GGANDIVA_VERSION_MINOR) +#endif + + +#define GGANDIVA_AVAILABLE_IN_ALL + +#if GGANDIVA_VERSION_MIN_REQUIRED >= GGANDIVA_VERSION_4_0 +# define GGANDIVA_DEPRECATED_IN_4_0 GGANDIVA_DEPRECATED +# define GGANDIVA_DEPRECATED_IN_4_0_FOR(function) GGANDIVA_DEPRECATED_FOR(function) +#else +# define GGANDIVA_DEPRECATED_IN_4_0 +# define GGANDIVA_DEPRECATED_IN_4_0_FOR(function) +#endif + +#if GGANDIVA_VERSION_MAX_ALLOWED < GGANDIVA_VERSION_4_0 +# define GGANDIVA_AVAILABLE_IN_4_0 GGANDIVA_UNAVAILABLE(4, 0) +#else +# define GGANDIVA_AVAILABLE_IN_4_0 +#endif + +#if GGANDIVA_VERSION_MIN_REQUIRED >= GGANDIVA_VERSION_1_0 +# define GGANDIVA_DEPRECATED_IN_1_0 GGANDIVA_DEPRECATED +# define GGANDIVA_DEPRECATED_IN_1_0_FOR(function) GGANDIVA_DEPRECATED_FOR(function) +#else +# define GGANDIVA_DEPRECATED_IN_1_0 +# define GGANDIVA_DEPRECATED_IN_1_0_FOR(function) +#endif + +#if GGANDIVA_VERSION_MAX_ALLOWED < GGANDIVA_VERSION_1_0 +# define GGANDIVA_AVAILABLE_IN_1_0 GGANDIVA_UNAVAILABLE(1, 0) +#else +# define GGANDIVA_AVAILABLE_IN_1_0 +#endif + +#if GGANDIVA_VERSION_MIN_REQUIRED >= GGANDIVA_VERSION_0_17 +# define GGANDIVA_DEPRECATED_IN_0_17 GGANDIVA_DEPRECATED +# define GGANDIVA_DEPRECATED_IN_0_17_FOR(function) GGANDIVA_DEPRECATED_FOR(function) +#else +# define GGANDIVA_DEPRECATED_IN_0_17 +# define GGANDIVA_DEPRECATED_IN_0_17_FOR(function) +#endif + +#if GGANDIVA_VERSION_MAX_ALLOWED < GGANDIVA_VERSION_0_17 +# define GGANDIVA_AVAILABLE_IN_0_17 GGANDIVA_UNAVAILABLE(0, 17) +#else +# define GGANDIVA_AVAILABLE_IN_0_17 +#endif diff --git a/src/arrow/c_glib/meson.build b/src/arrow/c_glib/meson.build new file mode 100644 index 000000000..df04cf0a0 --- /dev/null +++ b/src/arrow/c_glib/meson.build @@ -0,0 +1,191 @@ +# -*- indent-tabs-mode: nil -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +project('arrow-glib', 'c', 'cpp', + license: 'Apache-2.0', + default_options: [ + 'cpp_std=c++11', + ]) + +version = '6.0.1' +if version.endswith('-SNAPSHOT') + version_numbers = version.split('-')[0].split('.') + version_tag = version.split('-')[1] +else + version_numbers = version.split('.') + version_tag = '' +endif +version_major = version_numbers[0].to_int() +version_minor = version_numbers[1].to_int() +version_micro = version_numbers[2].to_int() + +api_version = '1.0' +so_version = version_major * 100 + version_minor +so_version_patch = version_micro +library_version = '@0@.@1@.@2@'.format(so_version, so_version_patch, 0) + +prefix = get_option('prefix') +include_dir = join_paths(prefix, get_option('includedir')) +data_dir = join_paths(prefix, get_option('datadir')) + +gnome = import('gnome') +pkgconfig = import('pkgconfig') + +base_include_directories = [ + include_directories('.') +] + +have_gi = dependency('gobject-introspection-1.0', required: false).found() + +arrow_cpp_build_dir = get_option('arrow_cpp_build_dir') +arrow_cpp_build_type = get_option('arrow_cpp_build_type') +if arrow_cpp_build_dir == '' + arrow_cpp_build_lib_dir = '' +else + arrow_cpp_build_lib_dir = join_paths(meson.source_root(), + arrow_cpp_build_dir, + arrow_cpp_build_type.to_lower()) +endif + +if arrow_cpp_build_lib_dir == '' + arrow = dependency('arrow') + # They are just for checking required modules are enabled. They are built into + # libarrow.so. So we don't need additional build flags for them. + dependency('arrow-compute') + dependency('arrow-csv') + dependency('arrow-filesystem') + dependency('arrow-json') + + have_arrow_orc = dependency('arrow-orc', required: false).found() + arrow_cuda = dependency('arrow-cuda', required: false) + arrow_dataset = dependency('arrow-dataset', required: false) + arrow_flight = dependency('arrow-flight', required: false) + gandiva = dependency('gandiva', required: false) + parquet = dependency('parquet', required: false) + plasma = dependency('plasma', required: false) +else + base_include_directories += [ + include_directories(join_paths(arrow_cpp_build_dir, 'src')), + include_directories('../cpp/src'), + ] + cpp_compiler = meson.get_compiler('cpp') + arrow = cpp_compiler.find_library('arrow', + dirs: [arrow_cpp_build_lib_dir]) + arrow_orc_code = ''' +#include <arrow/adapters/orc/adapter.h> + +int +main(void) +{ + arrow::adapters::orc::ORCFileReader::Open(nullptr, nullptr, nullptr); + return 0; +} + ''' + have_arrow_orc = cpp_compiler.links(arrow_orc_code, + include_directories: base_include_directories, + dependencies: [arrow]) + arrow_cuda = cpp_compiler.find_library('arrow_cuda', + dirs: [arrow_cpp_build_lib_dir], + required: false) + arrow_dataset = cpp_compiler.find_library('arrow_dataset', + dirs: [arrow_cpp_build_lib_dir], + required: false) + arrow_flight = cpp_compiler.find_library('arrow_flight', + dirs: [arrow_cpp_build_lib_dir], + required: false) + gandiva = cpp_compiler.find_library('gandiva', + dirs: [arrow_cpp_build_lib_dir], + required: false) + parquet = cpp_compiler.find_library('parquet', + dirs: [arrow_cpp_build_lib_dir], + required: false) + plasma = cpp_compiler.find_library('plasma', + dirs: [arrow_cpp_build_lib_dir], + required: false) +endif + +cxx = meson.get_compiler('cpp') +cxx_flags = [] +if get_option('development_mode') + if cxx.get_id() == 'msvc' + cxx_flags += ['/WX'] + else + cxx_flags += ['-Werror'] + endif +endif +if cxx.get_id() != 'msvc' + cxx_flags += ['-Wmissing-declarations'] +endif +add_project_arguments(cxx.get_supported_arguments(cxx_flags), language: 'cpp') + +subdir('arrow-glib') +if arrow_cuda.found() + subdir('arrow-cuda-glib') +endif +if arrow_dataset.found() + subdir('arrow-dataset-glib') +endif +if arrow_flight.found() + subdir('arrow-flight-glib') +endif +if gandiva.found() + subdir('gandiva-glib') +endif +if parquet.found() + subdir('parquet-glib') +endif +if plasma.found() + subdir('plasma-glib') +endif +subdir('example') + +if get_option('gtk_doc') + subdir('doc/arrow-glib') + if arrow_dataset.found() + subdir('doc/arrow-dataset-glib') + endif + if arrow_flight.found() + subdir('doc/arrow-flight-glib') + endif + if gandiva.found() + subdir('doc/gandiva-glib') + endif + if parquet.found() + subdir('doc/parquet-glib') + endif + if plasma.found() + subdir('doc/plasma-glib') + endif +endif + +install_data('../LICENSE.txt', + 'README.md', + install_dir: join_paths(data_dir, 'doc', meson.project_name())) + +run_test = find_program('test/run-test.sh') +test('unit test', + run_test, + env: [ + 'ARROW_GLIB_TYPELIB_DIR=@0@/arrow-glib'.format(meson.build_root()), + 'ARROW_CUDA_GLIB_TYPELIB_DIR=@0@/arrow-cuda-glib'.format(meson.build_root()), + 'ARROW_DATASET_GLIB_TYPELIB_DIR=@0@/arrow-dataset-glib'.format(meson.build_root()), + 'GANDIVA_GLIB_TYPELIB_DIR=@0@/gandiva-glib'.format(meson.build_root()), + 'PARQUET_GLIB_TYPELIB_DIR=@0@/parquet-glib'.format(meson.build_root()), + 'PLASMA_GLIB_TYPELIB_DIR=@0@/plasma-glib'.format(meson.build_root()), + ]) diff --git a/src/arrow/c_glib/meson_options.txt b/src/arrow/c_glib/meson_options.txt new file mode 100644 index 000000000..1479388b1 --- /dev/null +++ b/src/arrow/c_glib/meson_options.txt @@ -0,0 +1,38 @@ +# -*- indent-tabs-mode: nil -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +option('arrow_cpp_build_dir', + type: 'string', + value: '', + description: 'Use this option to build with not installed Arrow C++') + +option('arrow_cpp_build_type', + type: 'string', + value: 'release', + description: '-DCMAKE_BUILD_TYPE option value for Arrow C++') + +option('development_mode', + type: 'boolean', + value: false, + description: 'Build in development mode') + +option('gtk_doc', + type: 'boolean', + value: false, + description: 'Build document by GTK-Doc') diff --git a/src/arrow/c_glib/parquet-glib/arrow-file-reader.cpp b/src/arrow/c_glib/parquet-glib/arrow-file-reader.cpp new file mode 100644 index 000000000..2532db202 --- /dev/null +++ b/src/arrow/c_glib/parquet-glib/arrow-file-reader.cpp @@ -0,0 +1,401 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/arrow-glib.hpp> +#include <arrow-glib/internal-index.hpp> + +#include <parquet-glib/arrow-file-reader.hpp> + +#include <parquet/file_reader.h> + +G_BEGIN_DECLS + +/** + * SECTION: arrow-file-reader + * @short_description: Arrow file reader class + * @include: parquet-glib/parquet-glib.h + * + * #GParquetArrowFileReader is a class for reading Apache Parquet data + * from file and returns them as Apache Arrow data. + */ + +typedef struct GParquetArrowFileReaderPrivate_ { + parquet::arrow::FileReader *arrow_file_reader; +} GParquetArrowFileReaderPrivate; + +enum { + PROP_0, + PROP_ARROW_FILE_READER +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GParquetArrowFileReader, + gparquet_arrow_file_reader, + G_TYPE_OBJECT) + +#define GPARQUET_ARROW_FILE_READER_GET_PRIVATE(obj) \ + static_cast<GParquetArrowFileReaderPrivate *>( \ + gparquet_arrow_file_reader_get_instance_private( \ + GPARQUET_ARROW_FILE_READER(obj))) + +static void +gparquet_arrow_file_reader_finalize(GObject *object) +{ + auto priv = GPARQUET_ARROW_FILE_READER_GET_PRIVATE(object); + + delete priv->arrow_file_reader; + + G_OBJECT_CLASS(gparquet_arrow_file_reader_parent_class)->finalize(object); +} + +static void +gparquet_arrow_file_reader_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GPARQUET_ARROW_FILE_READER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_ARROW_FILE_READER: + priv->arrow_file_reader = + static_cast<parquet::arrow::FileReader *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gparquet_arrow_file_reader_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + switch (prop_id) { + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gparquet_arrow_file_reader_init(GParquetArrowFileReader *object) +{ +} + +static void +gparquet_arrow_file_reader_class_init(GParquetArrowFileReaderClass *klass) +{ + GParamSpec *spec; + + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = gparquet_arrow_file_reader_finalize; + gobject_class->set_property = gparquet_arrow_file_reader_set_property; + gobject_class->get_property = gparquet_arrow_file_reader_get_property; + + spec = g_param_spec_pointer("arrow-file-reader", + "ArrowFileReader", + "The raw parquet::arrow::FileReader *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_ARROW_FILE_READER, spec); +} + +/** + * gparquet_arrow_file_reader_new_arrow: + * @source: Arrow source to be read. + * @error: (nullable): Return locatipcn for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GParquetArrowFileReader. + * + * Since: 0.11.0 + */ +GParquetArrowFileReader * +gparquet_arrow_file_reader_new_arrow(GArrowSeekableInputStream *source, + GError **error) +{ + auto arrow_random_access_file = + garrow_seekable_input_stream_get_raw(source); + auto arrow_memory_pool = arrow::default_memory_pool(); + std::unique_ptr<parquet::arrow::FileReader> parquet_arrow_file_reader; + auto status = parquet::arrow::OpenFile(arrow_random_access_file, + arrow_memory_pool, + &parquet_arrow_file_reader); + if (garrow_error_check(error, + status, + "[parquet][arrow][file-reader][new-arrow]")) { + return gparquet_arrow_file_reader_new_raw(parquet_arrow_file_reader.release()); + } else { + return NULL; + } +} + +/** + * gparquet_arrow_file_reader_new_path: + * @path: Path to be read. + * @error: (nullable): Return locatipcn for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GParquetArrowFileReader. + * + * Since: 0.11.0 + */ +GParquetArrowFileReader * +gparquet_arrow_file_reader_new_path(const gchar *path, + GError **error) +{ + auto arrow_memory_mapped_file = + arrow::io::MemoryMappedFile::Open(path, arrow::io::FileMode::READ); + if (!garrow::check(error, + arrow_memory_mapped_file, + "[parquet][arrow][file-reader][new-path]")) { + return NULL; + } + + std::shared_ptr<arrow::io::RandomAccessFile> arrow_random_access_file = + arrow_memory_mapped_file.ValueOrDie(); + auto arrow_memory_pool = arrow::default_memory_pool(); + std::unique_ptr<parquet::arrow::FileReader> parquet_arrow_file_reader; + auto status = parquet::arrow::OpenFile(arrow_random_access_file, + arrow_memory_pool, + &parquet_arrow_file_reader); + if (garrow::check(error, + status, + "[parquet][arrow][file-reader][new-path]")) { + return gparquet_arrow_file_reader_new_raw(parquet_arrow_file_reader.release()); + } else { + return NULL; + } +} + +/** + * gparquet_arrow_file_reader_read_table: + * @reader: A #GParquetArrowFileReader. + * @error: (nullable): Return locatipcn for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): A read #GArrowTable. + * + * Since: 0.11.0 + */ +GArrowTable * +gparquet_arrow_file_reader_read_table(GParquetArrowFileReader *reader, + GError **error) +{ + auto parquet_arrow_file_reader = gparquet_arrow_file_reader_get_raw(reader); + std::shared_ptr<arrow::Table> arrow_table; + auto status = parquet_arrow_file_reader->ReadTable(&arrow_table); + if (garrow_error_check(error, + status, + "[parquet][arrow][file-reader][read-table]")) { + return garrow_table_new_raw(&arrow_table); + } else { + return NULL; + } +} + +/** + * gparquet_arrow_file_reader_read_row_group: + * @reader: A #GParquetArrowFileReader. + * @row_group_index: A row group index to be read. + * @column_indices: (array length=n_column_indices) (nullable): + * Column indices to be read. %NULL means that all columns are read. + * If an index is negative, the index is counted backward from the + * end of the columns. `-1` means the last column. + * @n_column_indices: The number of elements of @column_indices. + * @error: (nullable): Return locatipcn for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): A read #GArrowTable. + * + * Since: 1.0.0 + */ +GArrowTable * +gparquet_arrow_file_reader_read_row_group(GParquetArrowFileReader *reader, + gint row_group_index, + gint *column_indices, + gsize n_column_indices, + GError **error) +{ + const gchar *tag = "[parquet][arrow][file-reader][read-row-group]"; + auto parquet_arrow_file_reader = gparquet_arrow_file_reader_get_raw(reader); + std::shared_ptr<arrow::Table> arrow_table; + arrow::Status status; + if (column_indices) { + const auto n_columns = + parquet_arrow_file_reader->parquet_reader()->metadata()->num_columns(); + std::vector<int> parquet_column_indices; + for (gsize i = 0; i < n_column_indices; ++i) { + auto column_index = column_indices[i]; + if (!garrow_internal_index_adjust(column_index, n_columns)) { + garrow_error_check(error, + arrow::Status::IndexError("Out of index: " + "<0..", n_columns, ">: " + "<", column_index, ">"), + tag); + return NULL; + } + parquet_column_indices.push_back(column_index); + } + status = + parquet_arrow_file_reader->ReadRowGroup(row_group_index, + parquet_column_indices, + &arrow_table); + } else { + status = + parquet_arrow_file_reader->ReadRowGroup(row_group_index, &arrow_table); + } + if (garrow_error_check(error, + status, + tag)) { + return garrow_table_new_raw(&arrow_table); + } else { + return NULL; + } +} + +/** + * gparquet_arrow_file_reader_get_schema: + * @reader: A #GParquetArrowFileReader. + * @error: (nullable): Return locatipcn for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): A got #GArrowSchema. + * + * Since: 0.12.0 + */ +GArrowSchema * +gparquet_arrow_file_reader_get_schema(GParquetArrowFileReader *reader, + GError **error) +{ + auto parquet_arrow_file_reader = gparquet_arrow_file_reader_get_raw(reader); + + std::shared_ptr<arrow::Schema> arrow_schema; + auto status = parquet_arrow_file_reader->GetSchema(&arrow_schema); + if (garrow_error_check(error, + status, + "[parquet][arrow][file-reader][get-schema]")) { + return garrow_schema_new_raw(&arrow_schema); + } else { + return NULL; + } +} + +/** + * gparquet_arrow_file_reader_read_column_data: + * @reader: A #GParquetArrowFileReader. + * @i: The index of the column to be read. + * If an index is negative, the index is counted backward from the + * end of the columns. `-1` means the last column. + * @error: (nullable): Return locatipcn for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): A read #GArrowChunkedArray. + * + * Since: 0.15.0 + */ +GArrowChunkedArray * +gparquet_arrow_file_reader_read_column_data(GParquetArrowFileReader *reader, + gint i, + GError **error) +{ + const auto tag = "[parquet][arrow][file-reader][read-column-data]"; + auto parquet_arrow_file_reader = gparquet_arrow_file_reader_get_raw(reader); + + const auto n_columns = + parquet_arrow_file_reader->parquet_reader()->metadata()->num_columns(); + if (!garrow_internal_index_adjust(i, n_columns)) { + garrow_error_check(error, + arrow::Status::IndexError("Out of index: " + "<0..", n_columns, ">: " + "<", i, ">"), + tag); + return NULL; + } + + std::shared_ptr<arrow::ChunkedArray> arrow_chunked_array; + auto status = + parquet_arrow_file_reader->ReadColumn(i, &arrow_chunked_array); + if (!garrow_error_check(error, status, tag)) { + return NULL; + } + + return garrow_chunked_array_new_raw(&arrow_chunked_array); +} + +/** + * gparquet_arrow_file_reader_get_n_row_groups: + * @reader: A #GParquetArrowFileReader. + * + * Returns: The number of row groups. + * + * Since: 0.11.0 + */ +gint +gparquet_arrow_file_reader_get_n_row_groups(GParquetArrowFileReader *reader) +{ + auto parquet_arrow_file_reader = gparquet_arrow_file_reader_get_raw(reader); + return parquet_arrow_file_reader->num_row_groups(); +} + +/** + * gparquet_arrow_file_reader_get_n_rows: + * @reader: A #GParquetArrowFileReader. + * + * Returns: The number of rows. + * + * Since: 6.0.0 + */ +gint64 +gparquet_arrow_file_reader_get_n_rows(GParquetArrowFileReader *reader) +{ + auto parquet_arrow_file_reader = gparquet_arrow_file_reader_get_raw(reader); + return parquet_arrow_file_reader->parquet_reader()->metadata()->num_rows(); +} + +/** + * gparquet_arrow_file_reader_use_threads: + * @reader: A #GParquetArrowFileReader. + * @use_threads: Whether use threads or not. + * + * Since: 0.11.0 + */ +void +gparquet_arrow_file_reader_set_use_threads(GParquetArrowFileReader *reader, + gboolean use_threads) +{ + auto parquet_arrow_file_reader = gparquet_arrow_file_reader_get_raw(reader); + parquet_arrow_file_reader->set_use_threads(use_threads); +} + +G_END_DECLS + +GParquetArrowFileReader * +gparquet_arrow_file_reader_new_raw(parquet::arrow::FileReader *parquet_arrow_file_reader) +{ + auto arrow_file_reader = + GPARQUET_ARROW_FILE_READER(g_object_new(GPARQUET_TYPE_ARROW_FILE_READER, + "arrow-file-reader", parquet_arrow_file_reader, + NULL)); + return arrow_file_reader; +} + +parquet::arrow::FileReader * +gparquet_arrow_file_reader_get_raw(GParquetArrowFileReader *arrow_file_reader) +{ + auto priv = GPARQUET_ARROW_FILE_READER_GET_PRIVATE(arrow_file_reader); + return priv->arrow_file_reader; +} diff --git a/src/arrow/c_glib/parquet-glib/arrow-file-reader.h b/src/arrow/c_glib/parquet-glib/arrow-file-reader.h new file mode 100644 index 000000000..abea06c57 --- /dev/null +++ b/src/arrow/c_glib/parquet-glib/arrow-file-reader.h @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/arrow-glib.h> + +G_BEGIN_DECLS + +#define GPARQUET_TYPE_ARROW_FILE_READER (gparquet_arrow_file_reader_get_type()) +G_DECLARE_DERIVABLE_TYPE(GParquetArrowFileReader, + gparquet_arrow_file_reader, + GPARQUET, + ARROW_FILE_READER, + GObject) +struct _GParquetArrowFileReaderClass +{ + GObjectClass parent_class; +}; + +GParquetArrowFileReader * +gparquet_arrow_file_reader_new_arrow(GArrowSeekableInputStream *source, + GError **error); +GParquetArrowFileReader * +gparquet_arrow_file_reader_new_path(const gchar *path, + GError **error); + +GArrowTable * +gparquet_arrow_file_reader_read_table(GParquetArrowFileReader *reader, + GError **error); + +GARROW_AVAILABLE_IN_1_0 +GArrowTable * +gparquet_arrow_file_reader_read_row_group(GParquetArrowFileReader *reader, + gint row_group_index, + gint *column_indices, + gsize n_column_indices, + GError **error); + +GArrowSchema * +gparquet_arrow_file_reader_get_schema(GParquetArrowFileReader *reader, + GError **error); + +GArrowChunkedArray * +gparquet_arrow_file_reader_read_column_data(GParquetArrowFileReader *reader, + gint i, + GError **error); + +gint +gparquet_arrow_file_reader_get_n_row_groups(GParquetArrowFileReader *reader); + +GARROW_AVAILABLE_IN_6_0 +gint64 +gparquet_arrow_file_reader_get_n_rows(GParquetArrowFileReader *reader); + +void +gparquet_arrow_file_reader_set_use_threads(GParquetArrowFileReader *reader, + gboolean use_threads); + +G_END_DECLS diff --git a/src/arrow/c_glib/parquet-glib/arrow-file-reader.hpp b/src/arrow/c_glib/parquet-glib/arrow-file-reader.hpp new file mode 100644 index 000000000..172dcccb0 --- /dev/null +++ b/src/arrow/c_glib/parquet-glib/arrow-file-reader.hpp @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <parquet/arrow/reader.h> + +#include <parquet-glib/arrow-file-reader.h> + +GParquetArrowFileReader * +gparquet_arrow_file_reader_new_raw(parquet::arrow::FileReader *parquet_arrow_file_reader); +parquet::arrow::FileReader * +gparquet_arrow_file_reader_get_raw(GParquetArrowFileReader *arrow_file_reader); diff --git a/src/arrow/c_glib/parquet-glib/arrow-file-writer.cpp b/src/arrow/c_glib/parquet-glib/arrow-file-writer.cpp new file mode 100644 index 000000000..c53bb94ce --- /dev/null +++ b/src/arrow/c_glib/parquet-glib/arrow-file-writer.cpp @@ -0,0 +1,579 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/arrow-glib.hpp> + +#include <parquet-glib/arrow-file-writer.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: arrow-file-writer + * @short_description: Arrow file writer class + * @include: parquet-glib/parquet-glib.h + * + * #GParquetWriterProperties is a class for the writer properties. + * + * #GParquetArrowFileWriter is a class for writer Apache Arrow data to + * file as Apache Parquet format. + */ + +typedef struct GParquetWriterPropertiesPrivate_ { + std::shared_ptr<parquet::WriterProperties> properties; + parquet::WriterProperties::Builder *builder; + gboolean changed; +} GParquetWriterPropertiesPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GParquetWriterProperties, + gparquet_writer_properties, + G_TYPE_OBJECT) + +#define GPARQUET_WRITER_PROPERTIES_GET_PRIVATE(object) \ + static_cast<GParquetWriterPropertiesPrivate *>( \ + gparquet_writer_properties_get_instance_private( \ + GPARQUET_WRITER_PROPERTIES(object))) + +static void +gparquet_writer_properties_finalize(GObject *object) +{ + auto priv = GPARQUET_WRITER_PROPERTIES_GET_PRIVATE(object); + + priv->properties.~shared_ptr(); + delete priv->builder; + + G_OBJECT_CLASS(gparquet_writer_properties_parent_class)->finalize(object); +} + +static void +gparquet_writer_properties_init(GParquetWriterProperties *object) +{ + auto priv = GPARQUET_WRITER_PROPERTIES_GET_PRIVATE(object); + new(&priv->properties) std::shared_ptr<parquet::WriterProperties>; + priv->builder = new parquet::WriterProperties::Builder(); + priv->changed = TRUE; +} + +static void +gparquet_writer_properties_class_init(GParquetWriterPropertiesClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = gparquet_writer_properties_finalize; +} + +/** + * gparquet_writer_properties_new: + * + * Return: A newly created #GParquetWriterProperties. + * + * Since: 0.17.0 + */ +GParquetWriterProperties * +gparquet_writer_properties_new(void) +{ + auto writer_properties = g_object_new(GPARQUET_TYPE_WRITER_PROPERTIES, + NULL); + return GPARQUET_WRITER_PROPERTIES(writer_properties); +} + +/** + * gparquet_writer_properties_set_compression: + * @properties: A #GParquetWriterProperties. + * @compression_type: A #GArrowCompressionType. + * @path: (nullable): The column path as dot string. + * + * Since: 0.17.0 + */ +void +gparquet_writer_properties_set_compression(GParquetWriterProperties *properties, + GArrowCompressionType compression_type, + const gchar *path) +{ + auto arrow_compression_type = garrow_compression_type_to_raw(compression_type); + auto priv = GPARQUET_WRITER_PROPERTIES_GET_PRIVATE(properties); + if (path) { + priv->builder->compression(path, arrow_compression_type); + } else { + priv->builder->compression(arrow_compression_type); + } + priv->changed = TRUE; +} + +/** + * gparquet_writer_properties_get_compression_path: + * @properties: A #GParquetWriterProperties. + * @path: The path as dot string. + * + * Returns: The compression type of #GParquetWriterProperties. + * + * Since: 0.17.0 + */ +GArrowCompressionType +gparquet_writer_properties_get_compression_path(GParquetWriterProperties *properties, + const gchar *path) +{ + auto parquet_properties = gparquet_writer_properties_get_raw(properties); + auto parquet_column_path = parquet::schema::ColumnPath::FromDotString(path); + auto arrow_compression = parquet_properties->compression(parquet_column_path); + return garrow_compression_type_from_raw(arrow_compression); +} + +/** + * gparquet_writer_properties_enable_dictionary: + * @properties: A #GParquetWriterProperties. + * @path: (nullable): The column path as dot string. + * + * Since: 0.17.0 + */ +void +gparquet_writer_properties_enable_dictionary(GParquetWriterProperties *properties, + const gchar *path) +{ + auto priv = GPARQUET_WRITER_PROPERTIES_GET_PRIVATE(properties); + if (path) { + priv->builder->enable_dictionary(path); + } else { + priv->builder->enable_dictionary(); + } + priv->changed = TRUE; +} + +/** + * gparquet_writer_properties_disable_dictionary: + * @properties: A #GParquetWriterProperties. + * @path: (nullable): The column path as dot string. + * + * Since: 0.17.0 + */ +void +gparquet_writer_properties_disable_dictionary(GParquetWriterProperties *properties, + const gchar *path) +{ + auto priv = GPARQUET_WRITER_PROPERTIES_GET_PRIVATE(properties); + if (path) { + priv->builder->disable_dictionary(path); + } else { + priv->builder->disable_dictionary(); + } + priv->changed = TRUE; +} + +/** + * gparquet_writer_properties_is_dictionary_enabled: + * @properties: A #GParquetWriterProperties. + * @path: The path as dot string. + * + * Returns: %TRUE on dictionary enabled, %FALSE on dictionary disabled. + * + * Since: 0.17.0 + */ +gboolean +gparquet_writer_properties_is_dictionary_enabled(GParquetWriterProperties *properties, + const gchar *path) +{ + auto parquet_properties = gparquet_writer_properties_get_raw(properties); + auto parquet_column_path = parquet::schema::ColumnPath::FromDotString(path); + return parquet_properties->dictionary_enabled(parquet_column_path); +} + +/** + * gparquet_writer_properties_set_dictionary_page_size_limit: + * @properties: A #GParquetWriterProperties. + * @limit: The dictionary page size limit. + * + * Since: 0.17.0 + */ +void +gparquet_writer_properties_set_dictionary_page_size_limit(GParquetWriterProperties *properties, + gint64 limit) +{ + auto priv = GPARQUET_WRITER_PROPERTIES_GET_PRIVATE(properties); + priv->builder->dictionary_pagesize_limit(limit); + priv->changed = TRUE; +} + +/** + * gparquet_writer_properties_get_dictionary_page_size_limit: + * @properties: A #GParquetWriterProperties. + * + * Returns: The dictionary page size limit. + * + * Since: 0.17.0 + */ +gint64 +gparquet_writer_properties_get_dictionary_page_size_limit(GParquetWriterProperties *properties) +{ + auto parquet_properties = gparquet_writer_properties_get_raw(properties); + return parquet_properties->dictionary_pagesize_limit(); +} + +/** + * gparquet_writer_properties_set_batch_size: + * @properties: A #GParquetWriterProperties. + * @batch_size: The batch size. + * + * Since: 0.17.0 + */ +void +gparquet_writer_properties_set_batch_size(GParquetWriterProperties *properties, + gint64 batch_size) +{ + auto priv = GPARQUET_WRITER_PROPERTIES_GET_PRIVATE(properties); + priv->builder->write_batch_size(batch_size); + priv->changed = TRUE; +} + +/** + * gparquet_writer_properties_get_batch_size: + * @properties: A #GParquetWriterProperties. + * + * Returns: The batch size. + * + * Since: 0.17.0 + */ +gint64 +gparquet_writer_properties_get_batch_size(GParquetWriterProperties *properties) +{ + auto parquet_properties = gparquet_writer_properties_get_raw(properties); + return parquet_properties->write_batch_size(); +} + +/** + * gparquet_writer_properties_set_max_row_group_length: + * @properties: A #GParquetWriterProperties. + * @length: The max row group length. + * + * Since: 0.17.0 + */ +void +gparquet_writer_properties_set_max_row_group_length(GParquetWriterProperties *properties, + gint64 length) +{ + auto priv = GPARQUET_WRITER_PROPERTIES_GET_PRIVATE(properties); + priv->builder->max_row_group_length(length); + priv->changed = TRUE; +} + +/** + * gparquet_writer_properties_get_max_row_group_length: + * @properties: A #GParquetWriterProperties. + * + * Returns: The max row group length. + * + * Since: 0.17.0 + */ +gint64 +gparquet_writer_properties_get_max_row_group_length(GParquetWriterProperties *properties) +{ + auto parquet_properties = gparquet_writer_properties_get_raw(properties); + return parquet_properties->max_row_group_length(); +} + +/** + * gparquet_writer_properties_set_data_page_size: + * @properties: A #GParquetWriterProperties. + * @data_page_size: The data page size. + * + * Since: 0.17.0 + */ +void +gparquet_writer_properties_set_data_page_size(GParquetWriterProperties *properties, + gint64 data_page_size) +{ + auto priv = GPARQUET_WRITER_PROPERTIES_GET_PRIVATE(properties); + priv->builder->data_pagesize(data_page_size); + priv->changed = TRUE; +} + +/** + * gparquet_writer_properties_get_data_page_size: + * @properties: A #GParquetWriterProperties. + * + * Returns: The data page size. + * + * Since: 0.17.0 + */ +gint64 +gparquet_writer_properties_get_data_page_size(GParquetWriterProperties *properties) +{ + auto parquet_properties = gparquet_writer_properties_get_raw(properties); + return parquet_properties->data_pagesize(); +} + + +typedef struct GParquetArrowFileWriterPrivate_ { + parquet::arrow::FileWriter *arrow_file_writer; +} GParquetArrowFileWriterPrivate; + +enum { + PROP_0, + PROP_ARROW_FILE_WRITER +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GParquetArrowFileWriter, + gparquet_arrow_file_writer, + G_TYPE_OBJECT) + +#define GPARQUET_ARROW_FILE_WRITER_GET_PRIVATE(obj) \ + static_cast<GParquetArrowFileWriterPrivate *>( \ + gparquet_arrow_file_writer_get_instance_private( \ + GPARQUET_ARROW_FILE_WRITER(obj))) + +static void +gparquet_arrow_file_writer_finalize(GObject *object) +{ + auto priv = GPARQUET_ARROW_FILE_WRITER_GET_PRIVATE(object); + + delete priv->arrow_file_writer; + + G_OBJECT_CLASS(gparquet_arrow_file_writer_parent_class)->finalize(object); +} + +static void +gparquet_arrow_file_writer_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GPARQUET_ARROW_FILE_WRITER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_ARROW_FILE_WRITER: + priv->arrow_file_writer = + static_cast<parquet::arrow::FileWriter *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gparquet_arrow_file_writer_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + switch (prop_id) { + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gparquet_arrow_file_writer_init(GParquetArrowFileWriter *object) +{ +} + +static void +gparquet_arrow_file_writer_class_init(GParquetArrowFileWriterClass *klass) +{ + GParamSpec *spec; + + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = gparquet_arrow_file_writer_finalize; + gobject_class->set_property = gparquet_arrow_file_writer_set_property; + gobject_class->get_property = gparquet_arrow_file_writer_get_property; + + spec = g_param_spec_pointer("arrow-file-writer", + "ArrowFileWriter", + "The raw std::shared<parquet::arrow::FileWriter> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_ARROW_FILE_WRITER, spec); +} + +/** + * gparquet_arrow_file_writer_new_arrow: + * @schema: Arrow schema for written data. + * @sink: Arrow output stream to be written. + * @writer_properties: (nullable): A #GParquetWriterProperties. + * @error: (nullable): Return locatipcn for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GParquetArrowFileWriter. + * + * Since: 0.11.0 + */ +GParquetArrowFileWriter * +gparquet_arrow_file_writer_new_arrow(GArrowSchema *schema, + GArrowOutputStream *sink, + GParquetWriterProperties *writer_properties, + GError **error) +{ + auto arrow_schema = garrow_schema_get_raw(schema).get(); + auto arrow_output_stream = garrow_output_stream_get_raw(sink); + auto arrow_memory_pool = arrow::default_memory_pool(); + std::unique_ptr<parquet::arrow::FileWriter> parquet_arrow_file_writer; + arrow::Status status; + if (writer_properties) { + auto parquet_writer_properties = gparquet_writer_properties_get_raw(writer_properties); + status = parquet::arrow::FileWriter::Open(*arrow_schema, + arrow_memory_pool, + arrow_output_stream, + parquet_writer_properties, + &parquet_arrow_file_writer); + } else { + auto parquet_writer_properties = parquet::default_writer_properties(); + status = parquet::arrow::FileWriter::Open(*arrow_schema, + arrow_memory_pool, + arrow_output_stream, + parquet_writer_properties, + &parquet_arrow_file_writer); + } + if (garrow_error_check(error, + status, + "[parquet][arrow][file-writer][new-arrow]")) { + return gparquet_arrow_file_writer_new_raw(parquet_arrow_file_writer.release()); + } else { + return NULL; + } +} + +/** + * gparquet_arrow_file_writer_new_path: + * @schema: Arrow schema for written data. + * @path: Path to be read. + * @writer_properties: (nullable): A #GParquetWriterProperties. + * @error: (nullable): Return locatipcn for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GParquetArrowFileWriter. + * + * Since: 0.11.0 + */ +GParquetArrowFileWriter * +gparquet_arrow_file_writer_new_path(GArrowSchema *schema, + const gchar *path, + GParquetWriterProperties *writer_properties, + GError **error) +{ + auto arrow_file_output_stream = + arrow::io::FileOutputStream::Open(path, false); + if (!garrow::check(error, + arrow_file_output_stream, + "[parquet][arrow][file-writer][new-path]")) { + return NULL; + } + + auto arrow_schema = garrow_schema_get_raw(schema).get(); + std::shared_ptr<arrow::io::OutputStream> arrow_output_stream = + arrow_file_output_stream.ValueOrDie(); + auto arrow_memory_pool = arrow::default_memory_pool(); + std::unique_ptr<parquet::arrow::FileWriter> parquet_arrow_file_writer; + arrow::Status status; + if (writer_properties) { + auto parquet_writer_properties = gparquet_writer_properties_get_raw(writer_properties); + status = parquet::arrow::FileWriter::Open(*arrow_schema, + arrow_memory_pool, + arrow_output_stream, + parquet_writer_properties, + &parquet_arrow_file_writer); + } else { + auto parquet_writer_properties = parquet::default_writer_properties(); + status = parquet::arrow::FileWriter::Open(*arrow_schema, + arrow_memory_pool, + arrow_output_stream, + parquet_writer_properties, + &parquet_arrow_file_writer); + } + if (garrow::check(error, + status, + "[parquet][arrow][file-writer][new-path]")) { + return gparquet_arrow_file_writer_new_raw(parquet_arrow_file_writer.release()); + } else { + return NULL; + } +} + +/** + * gparquet_arrow_file_writer_write_table: + * @writer: A #GParquetArrowFileWriter. + * @table: A table to be written. + * @chunk_size: The max number of rows in a row group. + * @error: (nullable): Return locatipcn for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.11.0 + */ +gboolean +gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer, + GArrowTable *table, + guint64 chunk_size, + GError **error) +{ + auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer); + auto arrow_table = garrow_table_get_raw(table).get(); + auto status = parquet_arrow_file_writer->WriteTable(*arrow_table, chunk_size); + return garrow_error_check(error, + status, + "[parquet][arrow][file-writer][write-table]"); +} + +/** + * gparquet_arrow_file_writer_close: + * @writer: A #GParquetArrowFileWriter. + * @error: (nullable): Return locatipcn for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.11.0 + */ +gboolean +gparquet_arrow_file_writer_close(GParquetArrowFileWriter *writer, + GError **error) +{ + auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer); + auto status = parquet_arrow_file_writer->Close(); + return garrow_error_check(error, + status, + "[parquet][arrow][file-writer][close]"); +} + + +G_END_DECLS + +GParquetArrowFileWriter * +gparquet_arrow_file_writer_new_raw(parquet::arrow::FileWriter *parquet_arrow_file_writer) +{ + auto arrow_file_writer = + GPARQUET_ARROW_FILE_WRITER(g_object_new(GPARQUET_TYPE_ARROW_FILE_WRITER, + "arrow-file-writer", parquet_arrow_file_writer, + NULL)); + return arrow_file_writer; +} + +parquet::arrow::FileWriter * +gparquet_arrow_file_writer_get_raw(GParquetArrowFileWriter *arrow_file_writer) +{ + auto priv = GPARQUET_ARROW_FILE_WRITER_GET_PRIVATE(arrow_file_writer); + return priv->arrow_file_writer; +} + +std::shared_ptr<parquet::WriterProperties> +gparquet_writer_properties_get_raw(GParquetWriterProperties *properties) +{ + auto priv = GPARQUET_WRITER_PROPERTIES_GET_PRIVATE(properties); + if (priv->changed) { + priv->properties = priv->builder->build(); + priv->changed = FALSE; + } + return priv->properties; +} diff --git a/src/arrow/c_glib/parquet-glib/arrow-file-writer.h b/src/arrow/c_glib/parquet-glib/arrow-file-writer.h new file mode 100644 index 000000000..67083a074 --- /dev/null +++ b/src/arrow/c_glib/parquet-glib/arrow-file-writer.h @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/arrow-glib.h> + +G_BEGIN_DECLS + +#define GPARQUET_TYPE_WRITER_PROPERTIES \ + (gparquet_writer_properties_get_type()) +G_DECLARE_DERIVABLE_TYPE(GParquetWriterProperties, + gparquet_writer_properties, + GPARQUET, + WRITER_PROPERTIES, + GObject) +struct _GParquetWriterPropertiesClass +{ + GObjectClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_17 +GParquetWriterProperties *gparquet_writer_properties_new(void); +GARROW_AVAILABLE_IN_0_17 +void +gparquet_writer_properties_set_compression(GParquetWriterProperties *properties, + GArrowCompressionType compression_type, + const gchar *path); +GARROW_AVAILABLE_IN_0_17 +GArrowCompressionType +gparquet_writer_properties_get_compression_path(GParquetWriterProperties *properties, + const gchar *path); +GARROW_AVAILABLE_IN_0_17 +void +gparquet_writer_properties_enable_dictionary(GParquetWriterProperties *properties, + const gchar *path); +GARROW_AVAILABLE_IN_0_17 +void +gparquet_writer_properties_disable_dictionary(GParquetWriterProperties *properties, + const gchar *path); +GARROW_AVAILABLE_IN_0_17 +gboolean +gparquet_writer_properties_is_dictionary_enabled(GParquetWriterProperties *properties, + const gchar *path); +GARROW_AVAILABLE_IN_0_17 +void +gparquet_writer_properties_set_dictionary_page_size_limit(GParquetWriterProperties *properties, + gint64 limit); +GARROW_AVAILABLE_IN_0_17 +gint64 +gparquet_writer_properties_get_dictionary_page_size_limit(GParquetWriterProperties *properties); +GARROW_AVAILABLE_IN_0_17 +void +gparquet_writer_properties_set_batch_size(GParquetWriterProperties *properties, + gint64 batch_size); +GARROW_AVAILABLE_IN_0_17 +gint64 +gparquet_writer_properties_get_batch_size(GParquetWriterProperties *properties); +GARROW_AVAILABLE_IN_0_17 +void +gparquet_writer_properties_set_max_row_group_length(GParquetWriterProperties *properties, + gint64 length); +GARROW_AVAILABLE_IN_0_17 +gint64 +gparquet_writer_properties_get_max_row_group_length(GParquetWriterProperties *properties); +GARROW_AVAILABLE_IN_0_17 +void +gparquet_writer_properties_set_data_page_size(GParquetWriterProperties *properties, + gint64 data_page_size); +GARROW_AVAILABLE_IN_0_17 +gint64 +gparquet_writer_properties_get_data_page_size(GParquetWriterProperties *properties); + + +#define GPARQUET_TYPE_ARROW_FILE_WRITER (gparquet_arrow_file_writer_get_type()) +G_DECLARE_DERIVABLE_TYPE(GParquetArrowFileWriter, + gparquet_arrow_file_writer, + GPARQUET, + ARROW_FILE_WRITER, + GObject) +struct _GParquetArrowFileWriterClass +{ + GObjectClass parent_class; +}; + +GParquetArrowFileWriter * +gparquet_arrow_file_writer_new_arrow(GArrowSchema *schema, + GArrowOutputStream *sink, + GParquetWriterProperties *writer_properties, + GError **error); +GParquetArrowFileWriter * +gparquet_arrow_file_writer_new_path(GArrowSchema *schema, + const gchar *path, + GParquetWriterProperties *writer_properties, + GError **error); + +gboolean +gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer, + GArrowTable *table, + guint64 chunk_size, + GError **error); + +gboolean +gparquet_arrow_file_writer_close(GParquetArrowFileWriter *writer, + GError **error); + +G_END_DECLS diff --git a/src/arrow/c_glib/parquet-glib/arrow-file-writer.hpp b/src/arrow/c_glib/parquet-glib/arrow-file-writer.hpp new file mode 100644 index 000000000..69fecf1be --- /dev/null +++ b/src/arrow/c_glib/parquet-glib/arrow-file-writer.hpp @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <memory> + +#include <parquet/arrow/writer.h> + +#include <parquet-glib/arrow-file-writer.h> + +GParquetArrowFileWriter * +gparquet_arrow_file_writer_new_raw(parquet::arrow::FileWriter *parquet_arrow_file_writer); +parquet::arrow::FileWriter * +gparquet_arrow_file_writer_get_raw(GParquetArrowFileWriter *arrow_file_writer); +std::shared_ptr<parquet::WriterProperties> +gparquet_writer_properties_get_raw(GParquetWriterProperties *properties); diff --git a/src/arrow/c_glib/parquet-glib/meson.build b/src/arrow/c_glib/parquet-glib/meson.build new file mode 100644 index 000000000..73cd9e45c --- /dev/null +++ b/src/arrow/c_glib/parquet-glib/meson.build @@ -0,0 +1,83 @@ +# -*- indent-tabs-mode: nil -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +project_name = 'parquet-glib' + +sources = files( + 'arrow-file-reader.cpp', + 'arrow-file-writer.cpp', +) + +c_headers = files( + 'arrow-file-reader.h', + 'arrow-file-writer.h', + 'parquet-glib.h', +) + +cpp_headers = files( + 'arrow-file-reader.hpp', + 'arrow-file-writer.hpp', + 'parquet-glib.hpp', +) + +headers = c_headers + cpp_headers +install_headers(headers, subdir: project_name) + + +dependencies = [ + arrow, + parquet, + arrow_glib, +] +libparquet_glib = library('parquet-glib', + sources: sources, + install: true, + dependencies: dependencies, + include_directories: base_include_directories, + soversion: so_version, + version: library_version) +parquet_glib = declare_dependency(link_with: libparquet_glib, + include_directories: base_include_directories, + dependencies: dependencies) + +pkgconfig.generate(libparquet_glib, + filebase: project_name, + name: 'Apache Parquet GLib', + description: 'C API for Apache Parquet based on GLib', + version: version, + requires: ['parquet', 'arrow-glib']) + +if have_gi + gnome.generate_gir(libparquet_glib, + dependencies: declare_dependency(sources: arrow_glib_gir), + sources: sources + c_headers, + namespace: 'Parquet', + nsversion: api_version, + identifier_prefix: 'GParquet', + symbol_prefix: 'gparquet', + export_packages: 'parquet-glib', + includes: [ + 'Arrow-1.0', + ], + install: true, + extra_args: [ + '--warn-all', + '--include-uninstalled=./arrow-glib/Arrow-1.0.gir', + ]) +endif diff --git a/src/arrow/c_glib/parquet-glib/parquet-glib.h b/src/arrow/c_glib/parquet-glib/parquet-glib.h new file mode 100644 index 000000000..6ae0f7e8f --- /dev/null +++ b/src/arrow/c_glib/parquet-glib/parquet-glib.h @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <parquet-glib/arrow-file-reader.h> +#include <parquet-glib/arrow-file-writer.h> diff --git a/src/arrow/c_glib/parquet-glib/parquet-glib.hpp b/src/arrow/c_glib/parquet-glib/parquet-glib.hpp new file mode 100644 index 000000000..988e715a2 --- /dev/null +++ b/src/arrow/c_glib/parquet-glib/parquet-glib.hpp @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <parquet-glib/parquet-glib.h> + +#include <parquet-glib/arrow-file-reader.hpp> +#include <parquet-glib/arrow-file-writer.hpp> diff --git a/src/arrow/c_glib/plasma-glib/client.cpp b/src/arrow/c_glib/plasma-glib/client.cpp new file mode 100644 index 000000000..26476f4d6 --- /dev/null +++ b/src/arrow/c_glib/plasma-glib/client.cpp @@ -0,0 +1,612 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/buffer.hpp> +#include <arrow-glib/error.hpp> + +#ifdef HAVE_ARROW_CUDA +# include <arrow-cuda-glib/cuda.hpp> +#endif + +#include <plasma-glib/client.hpp> +#include <plasma-glib/object.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: client + * @section_id: client-classes + * @title: Client related classes + * @include: plasma-glib/plasma-glib.h + * + * #GPlasmaClientOptions is a class for customizing plasma store + * connection. + * + * #GPlasmaClientCreateOptions is a class for customizing object creation. + * + * #GPlasmaClient is a class for an interface with a plasma store. + * + * Since: 0.12.0 + */ + +typedef struct GPlasmaClientCreatePrivate_ { + gint n_retries; +} GPlasmaClientOptionsPrivate; + +enum { + PROP_N_RETRIES = 1 +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GPlasmaClientOptions, + gplasma_client_options, + G_TYPE_OBJECT) + +#define GPLASMA_CLIENT_OPTIONS_GET_PRIVATE(object) \ + static_cast<GPlasmaClientOptionsPrivate *>( \ + gplasma_client_options_get_instance_private( \ + GPLASMA_CLIENT_OPTIONS(object))) + +static void +gplasma_client_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GPLASMA_CLIENT_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_N_RETRIES: + priv->n_retries = g_value_get_int(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gplasma_client_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GPLASMA_CLIENT_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_N_RETRIES: + g_value_set_int(value, priv->n_retries); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gplasma_client_options_init(GPlasmaClientOptions *object) +{ +} + +static void +gplasma_client_options_class_init(GPlasmaClientOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = gplasma_client_options_set_property; + gobject_class->get_property = gplasma_client_options_get_property; + + GParamSpec *spec; + spec = g_param_spec_int("n-retries", + "N retries", + "The number of retries to connect plasma store. " + "-1 means that the system default value is used.", + -1, + G_MAXINT, + -1, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT)); + g_object_class_install_property(gobject_class, PROP_N_RETRIES, spec); +} + +/** + * gplasma_client_options_new: + * + * Returns: A newly created #GPlasmaClientOptions. + * + * Since: 0.12.0 + */ +GPlasmaClientOptions * +gplasma_client_options_new(void) +{ + auto options = g_object_new(GPLASMA_TYPE_CLIENT_OPTIONS, + NULL); + return GPLASMA_CLIENT_OPTIONS(options); +} + +/** + * gplasma_client_options_set_n_retries: + * @options: A #GPlasmaClientOptions. + * @n_retries: The number of retires on connect. + * + * Since: 0.12.0 + */ +void +gplasma_client_options_set_n_retries(GPlasmaClientOptions *options, + gint n_retries) +{ + auto priv = GPLASMA_CLIENT_OPTIONS_GET_PRIVATE(options); + priv->n_retries = n_retries; +} + +/** + * gplasma_client_options_get_n_retries: + * @options: A #GPlasmaClientOptions. + * + * Returns: The number of retries on connect. + * + * Since: 0.12.0 + */ +gint +gplasma_client_options_get_n_retries(GPlasmaClientOptions *options) +{ + auto priv = GPLASMA_CLIENT_OPTIONS_GET_PRIVATE(options); + return priv->n_retries; +} + + +typedef struct GPlasmaClientCreateOptionsPrivate_ { + guint8 *metadata; + gsize metadata_size; + gint gpu_device; +} GPlasmaClientCreateOptionsPrivate; + +enum { + PROP_GPU_DEVICE = 1 +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GPlasmaClientCreateOptions, + gplasma_client_create_options, + G_TYPE_OBJECT) + +#define GPLASMA_CLIENT_CREATE_OPTIONS_GET_PRIVATE(object) \ + static_cast<GPlasmaClientCreateOptionsPrivate *>( \ + gplasma_client_create_options_get_instance_private( \ + GPLASMA_CLIENT_CREATE_OPTIONS(object))) + +static void +gplasma_client_create_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GPLASMA_CLIENT_CREATE_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_GPU_DEVICE: + priv->gpu_device = g_value_get_int(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gplasma_client_create_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GPLASMA_CLIENT_CREATE_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_GPU_DEVICE: + g_value_set_int(value, priv->gpu_device); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gplasma_client_create_options_init(GPlasmaClientCreateOptions *object) +{ +} + +static void +gplasma_client_create_options_class_init(GPlasmaClientCreateOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = gplasma_client_create_options_set_property; + gobject_class->get_property = gplasma_client_create_options_get_property; + + GParamSpec *spec; + spec = g_param_spec_int("gpu-device", + "GPU device", + "The GPU device number. -1 means GPU isn't used.", + -1, + G_MAXINT, + -1, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT)); + g_object_class_install_property(gobject_class, PROP_GPU_DEVICE, spec); +} + +/** + * gplasma_client_create_options_new: + * + * Returns: A newly created #GPlasmaClientCreateOptions. + * + * Since: 0.12.0 + */ +GPlasmaClientCreateOptions * +gplasma_client_create_options_new(void) +{ + auto options = g_object_new(GPLASMA_TYPE_CLIENT_CREATE_OPTIONS, + NULL); + return GPLASMA_CLIENT_CREATE_OPTIONS(options); +} + +#if !GLIB_CHECK_VERSION(2, 68, 0) +# define g_memdup2(memory, byte_size) g_memdup(memory, byte_size) +#endif + +/** + * gplasma_client_create_options_set_metadata: + * @options: A #GPlasmaClientCreateOptions. + * @metadata: (nullable) (array length=size): The metadata of a created object. + * @size: The number of bytes of the metadata. + * + * Since: 0.12.0 + */ +void +gplasma_client_create_options_set_metadata(GPlasmaClientCreateOptions *options, + const guint8 *metadata, + gsize size) +{ + auto priv = GPLASMA_CLIENT_CREATE_OPTIONS_GET_PRIVATE(options); + if (priv->metadata) { + g_free(priv->metadata); + } + priv->metadata = static_cast<guint8 *>(g_memdup2(metadata, size)); + priv->metadata_size = size; +} + +/** + * gplasma_client_create_options_get_metadata: + * @options: A #GPlasmaClientCreateOptions. + * @size: (nullable) (out): The number of bytes of the metadata. + * + * Returns: (nullable) (array length=size): The metadata of a created object. + * + * Since: 0.12.0 + */ +const guint8 * +gplasma_client_create_options_get_metadata(GPlasmaClientCreateOptions *options, + gsize *size) +{ + auto priv = GPLASMA_CLIENT_CREATE_OPTIONS_GET_PRIVATE(options); + if (size) { + *size = priv->metadata_size; + } + return priv->metadata; +} + + +typedef struct GPlasmaClientPrivate_ { + plasma::PlasmaClient *client; + bool disconnected; +} GPlasmaClientPrivate; + +enum { + PROP_CLIENT = 1 +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GPlasmaClient, + gplasma_client, + G_TYPE_OBJECT) + +#define GPLASMA_CLIENT_GET_PRIVATE(object) \ + static_cast<GPlasmaClientPrivate *>( \ + gplasma_client_get_instance_private( \ + GPLASMA_CLIENT(object))) + +static void +gplasma_client_finalize(GObject *object) +{ + auto priv = GPLASMA_CLIENT_GET_PRIVATE(object); + + if (!priv->disconnected) { + auto status = priv->client->Disconnect(); + if (!status.ok()) { + g_warning("[plasma][client][finalize] Failed to disconnect: %s", + status.ToString().c_str()); + } + } + delete priv->client; + + G_OBJECT_CLASS(gplasma_client_parent_class)->finalize(object); +} + +static void +gplasma_client_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GPLASMA_CLIENT_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_CLIENT: + priv->client = + static_cast<plasma::PlasmaClient *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gplasma_client_init(GPlasmaClient *object) +{ +} + +static void +gplasma_client_class_init(GPlasmaClientClass *klass) +{ + GParamSpec *spec; + + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = gplasma_client_finalize; + gobject_class->set_property = gplasma_client_set_property; + + spec = g_param_spec_pointer("client", + "Client", + "The raw plasma::PlasmaClient *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_CLIENT, spec); +} + +/** + * gplasma_client_new: + * @store_socket_name: The name of the UNIX domain socket. + * @options: (nullable): The options to custom how to connect to plasma store. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GPlasmaClient on success, + * %NULL on error. + * + * Since: 0.12.0 + */ +GPlasmaClient * +gplasma_client_new(const gchar *store_socket_name, + GPlasmaClientOptions *options, + GError **error) +{ + auto plasma_client = new plasma::PlasmaClient(); + int n_retries = -1; + if (options) { + n_retries = gplasma_client_options_get_n_retries(options); + } + auto status = plasma_client->Connect(store_socket_name, "", 0, n_retries); + if (garrow_error_check(error, status, "[plasma][client][new]")) { + return gplasma_client_new_raw(plasma_client); + } else { + return NULL; + } +} + +/** + * gplasma_client_create: + * @client: A #GPlasmaClient. + * @id: The ID for a newly created object. + * @data_size: The number of bytes of data for a newly created object. + * @options: (nullable): The option for creating an object. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): A newly created #GPlasmaCreatedObject + * on success, %NULL on error. + * + * Since: 0.12.0 + */ +GPlasmaCreatedObject * +gplasma_client_create(GPlasmaClient *client, + GPlasmaObjectID *id, + gsize data_size, + GPlasmaClientCreateOptions *options, + GError **error) +{ + const auto context = "[plasma][client][create]"; + auto plasma_client = gplasma_client_get_raw(client); + auto plasma_id = gplasma_object_id_get_raw(id); + const uint8_t *raw_metadata = nullptr; + int64_t raw_metadata_size = 0; + int device_number = 0; + if (options) { + auto options_priv = GPLASMA_CLIENT_CREATE_OPTIONS_GET_PRIVATE(options); + raw_metadata = options_priv->metadata; + raw_metadata_size = options_priv->metadata_size; + if (options_priv->gpu_device >= 0) { +#ifndef HAVE_ARROW_CUDA + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "%s Arrow CUDA GLib is needed to use GPU", + context); + return NULL; +#endif + device_number = options_priv->gpu_device + 1; + } + } + std::shared_ptr<arrow::Buffer> plasma_data; + auto status = plasma_client->Create(plasma_id, + data_size, + raw_metadata, + raw_metadata_size, + &plasma_data, + device_number); + if (!garrow_error_check(error, status, context)) { + return NULL; + } + + GArrowBuffer *data = nullptr; + if (device_number == 0) { + auto plasma_mutable_data = + std::static_pointer_cast<arrow::MutableBuffer>(plasma_data); + data = GARROW_BUFFER(garrow_mutable_buffer_new_raw(&plasma_mutable_data)); +#ifdef HAVE_ARROW_CUDA + } else { + auto plasma_cuda_data = + std::static_pointer_cast<arrow::cuda::CudaBuffer>(plasma_data); + data = GARROW_BUFFER(garrow_cuda_buffer_new_raw(&plasma_cuda_data)); +#endif + } + std::shared_ptr<arrow::Buffer> plasma_metadata; + GArrowBuffer *metadata = nullptr; + if (raw_metadata_size > 0) { + plasma_metadata = + std::make_shared<arrow::Buffer>(raw_metadata, raw_metadata_size); + metadata = garrow_buffer_new_raw(&plasma_metadata); + } + return gplasma_created_object_new_raw(client, + id, + &plasma_data, + data, + metadata ? &plasma_metadata : nullptr, + metadata, + device_number - 1); +} + +/** + * gplasma_client_refer_object: + * @client: A #GPlasmaClient. + * @id: The ID of the target object. + * @timeout_ms: The timeout in milliseconds. -1 means no timeout. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): A found #GPlasmaReferredObject + * on success, %NULL on error. + * + * Since: 0.12.0 + */ +GPlasmaReferredObject * +gplasma_client_refer_object(GPlasmaClient *client, + GPlasmaObjectID *id, + gint64 timeout_ms, + GError **error) +{ + const auto context = "[plasma][client][refer-object]"; + auto plasma_client = gplasma_client_get_raw(client); + auto plasma_id = gplasma_object_id_get_raw(id); + std::vector<plasma::ObjectID> plasma_ids; + plasma_ids.push_back(plasma_id); + std::vector<plasma::ObjectBuffer> plasma_object_buffers; + auto status = plasma_client->Get(plasma_ids, + timeout_ms, + &plasma_object_buffers); + if (!garrow_error_check(error, status, context)) { + return NULL; + } + + auto plasma_object_buffer = plasma_object_buffers[0]; + auto plasma_data = plasma_object_buffer.data; + auto plasma_metadata = plasma_object_buffer.metadata; + GArrowBuffer *data = nullptr; + GArrowBuffer *metadata = nullptr; + if (plasma_object_buffer.device_num == 0) { + data = garrow_buffer_new_raw(&plasma_data); + metadata = garrow_buffer_new_raw(&plasma_metadata); + } else { +#ifdef HAVE_ARROW_CUDA + auto plasma_cuda_data = arrow::cuda::CudaBuffer::FromBuffer(plasma_data); + if (!garrow::check(error, plasma_cuda_data, context)) { + return NULL; + } + auto plasma_cuda_metadata = + arrow::cuda::CudaBuffer::FromBuffer(plasma_metadata); + if (!garrow::check(error, plasma_cuda_metadata, context)) { + return NULL; + } + + data = GARROW_BUFFER(garrow_cuda_buffer_new_raw(&(*plasma_cuda_data))); + metadata = + GARROW_BUFFER(garrow_cuda_buffer_new_raw(&(*plasma_cuda_metadata))); +#else + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "%s Arrow CUDA GLib is needed to use GPU", + context); + return NULL; +#endif + } + return gplasma_referred_object_new_raw(client, + id, + &plasma_data, + data, + &plasma_metadata, + metadata, + plasma_object_buffer.device_num - 1); +} + +/** + * gplasma_client_disconnect: + * @client: A #GPlasmaClient. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +gplasma_client_disconnect(GPlasmaClient *client, + GError **error) +{ + auto priv = GPLASMA_CLIENT_GET_PRIVATE(client); + auto status = priv->client->Disconnect(); + if (garrow_error_check(error, status, "[plasma][client][disconnect]")) { + priv->disconnected = true; + return TRUE; + } else { + return FALSE; + } +} + +G_END_DECLS + +GPlasmaClient * +gplasma_client_new_raw(plasma::PlasmaClient *plasma_client) +{ + auto client = g_object_new(GPLASMA_TYPE_CLIENT, + "client", plasma_client, + NULL); + return GPLASMA_CLIENT(client); +} + +plasma::PlasmaClient * +gplasma_client_get_raw(GPlasmaClient *client) +{ + auto priv = GPLASMA_CLIENT_GET_PRIVATE(client); + return priv->client; +} diff --git a/src/arrow/c_glib/plasma-glib/client.h b/src/arrow/c_glib/plasma-glib/client.h new file mode 100644 index 000000000..2cb983e14 --- /dev/null +++ b/src/arrow/c_glib/plasma-glib/client.h @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <plasma-glib/object.h> + +G_BEGIN_DECLS + +#define GPLASMA_TYPE_CLIENT_OPTIONS (gplasma_client_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GPlasmaClientOptions, + gplasma_client_options, + GPLASMA, + CLIENT_OPTIONS, + GObject) + +struct _GPlasmaClientOptionsClass +{ + GObjectClass parent_class; +}; + +GPlasmaClientOptions *gplasma_client_options_new(void); +void +gplasma_client_options_set_n_retries(GPlasmaClientOptions *options, + gint n_retries); +gint +gplasma_client_options_get_n_retries(GPlasmaClientOptions *options); + + +#define GPLASMA_TYPE_CLIENT_CREATE_OPTIONS \ + (gplasma_client_create_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GPlasmaClientCreateOptions, + gplasma_client_create_options, + GPLASMA, + CLIENT_CREATE_OPTIONS, + GObject) + +struct _GPlasmaClientCreateOptionsClass +{ + GObjectClass parent_class; +}; + +GPlasmaClientCreateOptions *gplasma_client_create_options_new(void); +void +gplasma_client_create_options_set_metadata(GPlasmaClientCreateOptions *options, + const guint8 *metadata, + gsize size); +const guint8 * +gplasma_client_create_options_get_metadata(GPlasmaClientCreateOptions *options, + gsize *size); + + +#define GPLASMA_TYPE_CLIENT (gplasma_client_get_type()) +G_DECLARE_DERIVABLE_TYPE(GPlasmaClient, + gplasma_client, + GPLASMA, + CLIENT, + GObject) + +struct _GPlasmaClientClass +{ + GObjectClass parent_class; +}; + +GPlasmaClient *gplasma_client_new(const gchar *store_socket_name, + GPlasmaClientOptions *options, + GError **error); +GPlasmaCreatedObject * +gplasma_client_create(GPlasmaClient *client, + GPlasmaObjectID *id, + gsize data_size, + GPlasmaClientCreateOptions *options, + GError **error); +GPlasmaReferredObject * +gplasma_client_refer_object(GPlasmaClient *client, + GPlasmaObjectID *id, + gint64 timeout_ms, + GError **error); +gboolean gplasma_client_disconnect(GPlasmaClient *client, + GError **error); + +G_END_DECLS diff --git a/src/arrow/c_glib/plasma-glib/client.hpp b/src/arrow/c_glib/plasma-glib/client.hpp new file mode 100644 index 000000000..d3e2ab259 --- /dev/null +++ b/src/arrow/c_glib/plasma-glib/client.hpp @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <plasma/client.h> + +#include <plasma-glib/client.h> + +GPlasmaClient * +gplasma_client_new_raw(plasma::PlasmaClient *plasma_client); +plasma::PlasmaClient * +gplasma_client_get_raw(GPlasmaClient *client); diff --git a/src/arrow/c_glib/plasma-glib/meson.build b/src/arrow/c_glib/plasma-glib/meson.build new file mode 100644 index 000000000..61ce69d1e --- /dev/null +++ b/src/arrow/c_glib/plasma-glib/meson.build @@ -0,0 +1,107 @@ +# -*- indent-tabs-mode: nil -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +project_name = 'plasma-glib' + +sources = files( + 'client.cpp', + 'object.cpp', +) + +c_headers = files( + 'client.h', + 'object.h', + 'plasma-glib.h', +) + +cpp_headers = files( + 'client.hpp', + 'object.hpp', + 'plasma-glib.hpp', +) + +headers = c_headers + cpp_headers +install_headers(headers, subdir: project_name) + + +dependencies = [ + plasma, + arrow_glib, +] +cpp_args = [ + '-DG_LOG_DOMAIN="Plasma"', +] +pkg_config_requires = [ + 'plasma', + 'arrow-glib', +] +if have_gi + gir_dependencies = [ + declare_dependency(sources: arrow_glib_gir), + ] + gir_includes = [ + 'Arrow-1.0', + ] + gir_extra_args = [ + '--warn-all', + '--include-uninstalled=./arrow-glib/Arrow-1.0.gir', + ] +endif +if arrow_cuda.found() + dependencies += [arrow_cuda_glib] + cpp_args += ['-DHAVE_ARROW_CUDA'] + pkg_config_requires += ['arrow-cuda-glib'] + if have_gi + gir_dependencies += [declare_dependency(sources: arrow_cuda_glib_gir)] + gir_includes += ['ArrowCUDA-1.0'] + gir_extra_args += ['--include-uninstalled=./arrow-cuda-glib/ArrowCUDA-1.0.gir'] + endif +endif +libplasma_glib = library('plasma-glib', + sources: sources, + install: true, + dependencies: dependencies, + include_directories: base_include_directories, + cpp_args: cpp_args, + soversion: so_version, + version: library_version) +plasma_glib = declare_dependency(link_with: libplasma_glib, + include_directories: base_include_directories, + dependencies: dependencies) + +pkgconfig.generate(libplasma_glib, + filebase: project_name, + name: 'Apache Arrow Plasma GLib', + description: 'C API for Apache Arrow Plasma based on GLib', + version: version, + requires: pkg_config_requires) + +if have_gi + gnome.generate_gir(libplasma_glib, + dependencies: gir_dependencies, + sources: sources + c_headers, + namespace: 'Plasma', + nsversion: api_version, + identifier_prefix: 'GPlasma', + symbol_prefix: 'gplasma', + export_packages: 'plasma-glib', + includes: gir_includes, + install: true, + extra_args: gir_extra_args) +endif diff --git a/src/arrow/c_glib/plasma-glib/object.cpp b/src/arrow/c_glib/plasma-glib/object.cpp new file mode 100644 index 000000000..121afb1cf --- /dev/null +++ b/src/arrow/c_glib/plasma-glib/object.cpp @@ -0,0 +1,590 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <arrow-glib/error.hpp> + +#include <plasma-glib/client.hpp> +#include <plasma-glib/object.hpp> + +G_BEGIN_DECLS + +/** + * SECTION: object + * @section_id: object-classes + * @title: Object related classes + * @include: plasma-glib/plasma-glib.h + * + * #GPlasmaObjectID is a class for an object ID. + * + * #GPlasmaObject is a base class for an object stored in plasma store. + * + * #GPlasmaCreatedObject is a class for a created object. You can + * change data of the object until the object is sealed or aborted. + * + * #GPlasmaReferredObject is a class for a created object. You can + * only refer the data and metadata of the object. You can't change + * the data of the object. + * + * Since: 0.12.0 + */ + +typedef struct GPlasmaObjectIDPrivate_ { + plasma::ObjectID id; +} GPlasmaObjectIDPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GPlasmaObjectID, + gplasma_object_id, + G_TYPE_OBJECT) + +#define GPLASMA_OBJECT_ID_GET_PRIVATE(object) \ + static_cast<GPlasmaObjectIDPrivate *>( \ + gplasma_object_id_get_instance_private( \ + GPLASMA_OBJECT_ID(object))) + +static void +gplasma_object_id_init(GPlasmaObjectID *object) +{ +} + +static void +gplasma_object_id_class_init(GPlasmaObjectIDClass *klass) +{ +} + +/** + * gplasma_object_id_new: + * @id: (array length=size): The raw ID bytes. + * @size: The number of bytes of the ID. It must be 1..20. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GPlasmaObjectID on success, + * %NULL on error. + * + * Since: 0.12.0 + */ +GPlasmaObjectID * +gplasma_object_id_new(const guint8 *id, + gsize size, + GError **error) +{ + if (size == 0 || size > plasma::kUniqueIDSize) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "[plasma][object-id][new] " + "ID must be 1..20 bytes: <%" G_GSIZE_FORMAT ">", + size); + return NULL; + } + + auto object_id = g_object_new(GPLASMA_TYPE_OBJECT_ID, NULL); + auto priv = GPLASMA_OBJECT_ID_GET_PRIVATE(object_id); + memcpy(priv->id.mutable_data(), id, size); + if (size != plasma::kUniqueIDSize) { + memset(priv->id.mutable_data() + size, 0, plasma::kUniqueIDSize - size); + } + return GPLASMA_OBJECT_ID(object_id); +} + +/** + * gplasma_object_id_to_binary: + * @id: A #GPlasmaObjectID. + * @size: (nullable) (out): The number of bytes of the byte string of + * the object ID. It's always 20. 20 is `plasma::kUniqueIDSize`. + * + * Returns: (array length=size): The byte string of the object ID. + * + * Since: 0.12.0 + */ +const guint8 * +gplasma_object_id_to_binary(GPlasmaObjectID *id, + gsize *size) +{ + auto priv = GPLASMA_OBJECT_ID_GET_PRIVATE(id); + if (size) { + *size = plasma::kUniqueIDSize; + } + return priv->id.data(); +} + +/** + * gplasma_object_id_to_hex: + * @id: A #GPlasmaObjectID. + * + * Returns: (transfer full): The hex representation of the object ID. + * + * It should be freed with g_free() when no longer needed. + * + * Since: 0.12.0 + */ +gchar * +gplasma_object_id_to_hex(GPlasmaObjectID *id) +{ + auto priv = GPLASMA_OBJECT_ID_GET_PRIVATE(id); + return g_strdup(priv->id.hex().c_str()); +} + +typedef struct GPlasmaObjectPrivate_ { + GPlasmaClient *client; + GPlasmaObjectID *id; + std::shared_ptr<arrow::Buffer> raw_data; + GArrowBuffer *data; + std::shared_ptr<arrow::Buffer> raw_metadata; + GArrowBuffer *metadata; + gint gpu_device; +} GPlasmaObjectPrivate; + +enum { + PROP_CLIENT = 1, + PROP_ID, + PROP_RAW_DATA, + PROP_DATA, + PROP_RAW_METADATA, + PROP_METADATA, + PROP_GPU_DEVICE +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GPlasmaObject, + gplasma_object, + G_TYPE_OBJECT) + +#define GPLASMA_OBJECT_GET_PRIVATE(object) \ + static_cast<GPlasmaObjectPrivate *>( \ + gplasma_object_get_instance_private( \ + GPLASMA_OBJECT(object))) + +static void +gplasma_object_dispose(GObject *object) +{ + auto priv = GPLASMA_OBJECT_GET_PRIVATE(object); + + // Properties except priv->id must be disposed in subclass. + + if (priv->id) { + g_object_unref(priv->id); + priv->id = nullptr; + } + + G_OBJECT_CLASS(gplasma_object_parent_class)->dispose(object); +} + +static void +gplasma_object_finalize(GObject *object) +{ + auto priv = GPLASMA_OBJECT_GET_PRIVATE(object); + + priv->raw_data.~shared_ptr(); + priv->raw_metadata.~shared_ptr(); + + G_OBJECT_CLASS(gplasma_object_parent_class)->finalize(object); +} + +static void +gplasma_object_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GPLASMA_OBJECT_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_CLIENT: + priv->client = GPLASMA_CLIENT(g_value_dup_object(value)); + break; + case PROP_ID: + priv->id = GPLASMA_OBJECT_ID(g_value_dup_object(value)); + break; + case PROP_RAW_DATA: + priv->raw_data = + *static_cast<std::shared_ptr<arrow::Buffer> *>(g_value_get_pointer(value)); + break; + case PROP_DATA: + priv->data = GARROW_BUFFER(g_value_dup_object(value)); + break; + case PROP_RAW_METADATA: + { + auto raw_metadata = + static_cast<std::shared_ptr<arrow::Buffer> *>(g_value_get_pointer(value)); + if (raw_metadata) { + priv->raw_metadata = *raw_metadata; + } else { + priv->raw_metadata = nullptr; + } + } + break; + case PROP_METADATA: + priv->metadata = GARROW_BUFFER(g_value_dup_object(value)); + break; + case PROP_GPU_DEVICE: + priv->gpu_device = g_value_get_int(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gplasma_object_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GPLASMA_OBJECT_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_CLIENT: + g_value_set_object(value, priv->client); + break; + case PROP_ID: + g_value_set_object(value, priv->id); + break; + case PROP_DATA: + g_value_set_object(value, priv->data); + break; + case PROP_METADATA: + g_value_set_object(value, priv->metadata); + break; + case PROP_GPU_DEVICE: + g_value_set_int(value, priv->gpu_device); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gplasma_object_init(GPlasmaObject *object) +{ + auto priv = GPLASMA_OBJECT_GET_PRIVATE(object); + new(&priv->raw_data) std::shared_ptr<arrow::Buffer>; + new(&priv->raw_metadata) std::shared_ptr<arrow::Buffer>; +} + +static void +gplasma_object_class_init(GPlasmaObjectClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = gplasma_object_dispose; + gobject_class->finalize = gplasma_object_finalize; + gobject_class->set_property = gplasma_object_set_property; + gobject_class->get_property = gplasma_object_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("client", + "Client", + "The client", + GPLASMA_TYPE_CLIENT, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_CLIENT, spec); + + spec = g_param_spec_object("id", + "ID", + "The ID of this object", + GPLASMA_TYPE_OBJECT_ID, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_ID, spec); + + spec = g_param_spec_pointer("raw-data", + "Raw data", + "The raw data of this object", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_RAW_DATA, spec); + + spec = g_param_spec_object("data", + "Data", + "The data of this object", + GARROW_TYPE_BUFFER, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_DATA, spec); + + spec = g_param_spec_pointer("raw-metadata", + "Raw metadata", + "The raw metadata of this object", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_RAW_METADATA, spec); + + spec = g_param_spec_object("metadata", + "Metadata", + "The metadata of this object", + GARROW_TYPE_BUFFER, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_METADATA, spec); + + spec = g_param_spec_int("gpu-device", + "GPU device", + "The GPU device number. -1 means GPU isn't used.", + -1, + G_MAXINT, + -1, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_GPU_DEVICE, spec); +} + +static bool +gplasma_object_check_not_released(GPlasmaObjectPrivate *priv, + GError **error, + const gchar *context) +{ + if (priv->client) { + return true; + } + + auto id_priv = GPLASMA_OBJECT_ID_GET_PRIVATE(priv->id); + auto id_hex = id_priv->id.hex(); + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "%s: Can't process released object: <%s>", + context, + id_hex.c_str()); + return false; +} + +static void +gplasma_object_release_resources(GPlasmaObjectPrivate *priv) +{ + if (priv->client) { + g_object_unref(priv->client); + priv->client = nullptr; + } + + if (priv->data) { + g_object_unref(priv->data); + priv->data = nullptr; + } + + if (priv->metadata) { + g_object_unref(priv->metadata); + priv->metadata = nullptr; + } +} + +G_DEFINE_TYPE(GPlasmaCreatedObject, + gplasma_created_object, + GPLASMA_TYPE_OBJECT) + +static void +gplasma_created_object_dispose(GObject *object) +{ + auto priv = GPLASMA_OBJECT_GET_PRIVATE(object); + + if (priv->client) { + gplasma_created_object_abort(GPLASMA_CREATED_OBJECT(object), NULL); + } + + G_OBJECT_CLASS(gplasma_created_object_parent_class)->dispose(object); +} + +static void +gplasma_created_object_init(GPlasmaCreatedObject *object) +{ +} + +static void +gplasma_created_object_class_init(GPlasmaCreatedObjectClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = gplasma_created_object_dispose; +} + +/** + * gplasma_created_object_seal: + * @object: A #GPlasmaCreatedObject. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Seals the object in the object store. You can't use the sealed + * object anymore. + * + * Returns: %TRUE on success, %FALSE on error. + * + * Since: 0.12.0 + */ +gboolean +gplasma_created_object_seal(GPlasmaCreatedObject *object, + GError **error) +{ + const auto context = "[plasma][created-object][seal]"; + + auto priv = GPLASMA_OBJECT_GET_PRIVATE(object); + if (!gplasma_object_check_not_released(priv, error, context)) { + return FALSE; + } + + auto plasma_client = gplasma_client_get_raw(priv->client); + auto id_priv = GPLASMA_OBJECT_ID_GET_PRIVATE(priv->id); + auto status = plasma_client->Seal(id_priv->id); + auto success = garrow_error_check(error, status, context); + if (success) { + status = plasma_client->Release(id_priv->id); + success = garrow_error_check(error, status, context); + gplasma_object_release_resources(priv); + } + return success; +} + +/** + * gplasma_created_object_abort: + * @object: A #GPlasmaCreatedObject. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Aborts the object in the object store. You can't use the aborted + * object anymore. + * + * Returns: %TRUE on success, %FALSE on error. + * + * Since: 0.12.0 + */ +gboolean +gplasma_created_object_abort(GPlasmaCreatedObject *object, + GError **error) +{ + const auto context = "[plasma][created-object][abort]"; + + auto priv = GPLASMA_OBJECT_GET_PRIVATE(object); + if (!gplasma_object_check_not_released(priv, error, context)) { + return FALSE; + } + + auto plasma_client = gplasma_client_get_raw(priv->client); + auto id_priv = GPLASMA_OBJECT_ID_GET_PRIVATE(priv->id); + auto status = plasma_client->Release(id_priv->id); + auto success = garrow_error_check(error, status, context); + if (success) { + status = plasma_client->Abort(id_priv->id); + success = garrow_error_check(error, status, context); + gplasma_object_release_resources(priv); + } + return success; +} + + +G_DEFINE_TYPE(GPlasmaReferredObject, + gplasma_referred_object, + GPLASMA_TYPE_OBJECT) + +static void +gplasma_referred_object_dispose(GObject *object) +{ + auto priv = GPLASMA_OBJECT_GET_PRIVATE(object); + + gplasma_object_release_resources(priv); + + G_OBJECT_CLASS(gplasma_referred_object_parent_class)->dispose(object); +} + +static void +gplasma_referred_object_init(GPlasmaReferredObject *object) +{ +} + +static void +gplasma_referred_object_class_init(GPlasmaReferredObjectClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = gplasma_referred_object_dispose; +} + +/** + * gplasma_referred_object_release: + * @object: A #GPlasmaReferredObject. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Releases the object explicitly. The object is no longer valid. + * + * Returns: %TRUE on success, %FALSE on error. + * + * Since: 0.12.0 + */ +gboolean +gplasma_referred_object_release(GPlasmaReferredObject *object, + GError **error) +{ + const auto context = "[plasma][referred-object][release]"; + + auto priv = GPLASMA_OBJECT_GET_PRIVATE(object); + if (!gplasma_object_check_not_released(priv, error, context)) { + return FALSE; + } + + gplasma_object_release_resources(priv); + return TRUE; +} + +G_END_DECLS + +plasma::ObjectID +gplasma_object_id_get_raw(GPlasmaObjectID *id) +{ + auto priv = GPLASMA_OBJECT_ID_GET_PRIVATE(id); + return priv->id; +} + +GPlasmaCreatedObject * +gplasma_created_object_new_raw(GPlasmaClient *client, + GPlasmaObjectID *id, + std::shared_ptr<arrow::Buffer> *raw_data, + GArrowBuffer *data, + std::shared_ptr<arrow::Buffer> *raw_metadata, + GArrowBuffer *metadata, + gint gpu_device) +{ + auto object = g_object_new(GPLASMA_TYPE_CREATED_OBJECT, + "client", client, + "id", id, + "raw-data", raw_data, + "data", data, + "raw-metadata", raw_metadata, + "metadata", metadata, + "gpu-device", gpu_device, + NULL); + return GPLASMA_CREATED_OBJECT(object); +} + +GPlasmaReferredObject * +gplasma_referred_object_new_raw(GPlasmaClient *client, + GPlasmaObjectID *id, + std::shared_ptr<arrow::Buffer> *raw_data, + GArrowBuffer *data, + std::shared_ptr<arrow::Buffer> *raw_metadata, + GArrowBuffer *metadata, + gint gpu_device) +{ + auto object = g_object_new(GPLASMA_TYPE_REFERRED_OBJECT, + "client", client, + "id", id, + "raw-data", raw_data, + "data", data, + "raw-metadata", raw_metadata, + "metadata", metadata, + "gpu-device", gpu_device, + NULL); + return GPLASMA_REFERRED_OBJECT(object); +} diff --git a/src/arrow/c_glib/plasma-glib/object.h b/src/arrow/c_glib/plasma-glib/object.h new file mode 100644 index 000000000..46547d37b --- /dev/null +++ b/src/arrow/c_glib/plasma-glib/object.h @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow-glib/buffer.h> + +G_BEGIN_DECLS + +#define GPLASMA_TYPE_OBJECT_ID (gplasma_object_id_get_type()) +G_DECLARE_DERIVABLE_TYPE(GPlasmaObjectID, + gplasma_object_id, + GPLASMA, + OBJECT_ID, + GObject) + +struct _GPlasmaObjectIDClass +{ + GObjectClass parent_class; +}; + +GPlasmaObjectID *gplasma_object_id_new(const guint8 *id, + gsize size, + GError **error); +const guint8 *gplasma_object_id_to_binary(GPlasmaObjectID *id, + gsize *size); +gchar *gplasma_object_id_to_hex(GPlasmaObjectID *id); + +#define GPLASMA_TYPE_OBJECT (gplasma_object_get_type()) +G_DECLARE_DERIVABLE_TYPE(GPlasmaObject, + gplasma_object, + GPLASMA, + OBJECT, + GObject) + +struct _GPlasmaObjectClass +{ + GObjectClass parent_class; +}; + +#define GPLASMA_TYPE_CREATED_OBJECT (gplasma_created_object_get_type()) +G_DECLARE_DERIVABLE_TYPE(GPlasmaCreatedObject, + gplasma_created_object, + GPLASMA, + CREATED_OBJECT, + GPlasmaObject) + +struct _GPlasmaCreatedObjectClass +{ + GPlasmaObjectClass parent_class; +}; + +gboolean gplasma_created_object_seal(GPlasmaCreatedObject *object, + GError **error); +gboolean gplasma_created_object_abort(GPlasmaCreatedObject *object, + GError **error); + +#define GPLASMA_TYPE_REFERRED_OBJECT (gplasma_referred_object_get_type()) +G_DECLARE_DERIVABLE_TYPE(GPlasmaReferredObject, + gplasma_referred_object, + GPLASMA, + REFERRED_OBJECT, + GPlasmaObject) + +struct _GPlasmaReferredObjectClass +{ + GPlasmaObjectClass parent_class; +}; + +gboolean gplasma_referred_object_release(GPlasmaReferredObject *object, + GError **error); + +G_END_DECLS diff --git a/src/arrow/c_glib/plasma-glib/object.hpp b/src/arrow/c_glib/plasma-glib/object.hpp new file mode 100644 index 000000000..9c18c77ea --- /dev/null +++ b/src/arrow/c_glib/plasma-glib/object.hpp @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <plasma/client.h> + +#include <plasma-glib/client.h> + +#include <plasma-glib/object.hpp> + +plasma::ObjectID +gplasma_object_id_get_raw(GPlasmaObjectID *id); + +GPlasmaCreatedObject * +gplasma_created_object_new_raw(GPlasmaClient *client, + GPlasmaObjectID *id, + std::shared_ptr<arrow::Buffer> *plasma_data, + GArrowBuffer *data, + std::shared_ptr<arrow::Buffer> *plasma_metadata, + GArrowBuffer *metadata, + gint gpu_device); + +GPlasmaReferredObject * +gplasma_referred_object_new_raw(GPlasmaClient *client, + GPlasmaObjectID *id, + std::shared_ptr<arrow::Buffer> *plasma_data, + GArrowBuffer *data, + std::shared_ptr<arrow::Buffer> *plasma_metadata, + GArrowBuffer *metadata, + gint gpu_device); diff --git a/src/arrow/c_glib/plasma-glib/plasma-glib.h b/src/arrow/c_glib/plasma-glib/plasma-glib.h new file mode 100644 index 000000000..2a6dd76ca --- /dev/null +++ b/src/arrow/c_glib/plasma-glib/plasma-glib.h @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <plasma-glib/client.h> +#include <plasma-glib/object.h> diff --git a/src/arrow/c_glib/plasma-glib/plasma-glib.hpp b/src/arrow/c_glib/plasma-glib/plasma-glib.hpp new file mode 100644 index 000000000..b2958c28f --- /dev/null +++ b/src/arrow/c_glib/plasma-glib/plasma-glib.hpp @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <plasma-glib/plasma-glib.h> + +#include <plasma-glib/client.hpp> +#include <plasma-glib/object.hpp> diff --git a/src/arrow/c_glib/test/dataset/test-file-format.rb b/src/arrow/c_glib/test/dataset/test-file-format.rb new file mode 100644 index 000000000..76ffede94 --- /dev/null +++ b/src/arrow/c_glib/test/dataset/test-file-format.rb @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDatasetFileFormat < Test::Unit::TestCase + def setup + omit("Arrow Dataset is required") unless defined?(ArrowDataset) + end + + def test_csv + assert_equal("csv", ArrowDataset::CSVFileFormat.new.type_name) + end + + def test_ipc + assert_equal("ipc", ArrowDataset::IPCFileFormat.new.type_name) + end + + def test_parquet + assert_equal("parquet", ArrowDataset::ParquetFileFormat.new.type_name) + end +end diff --git a/src/arrow/c_glib/test/dataset/test-file-system-dataset-factory.rb b/src/arrow/c_glib/test/dataset/test-file-system-dataset-factory.rb new file mode 100644 index 000000000..bca9e7241 --- /dev/null +++ b/src/arrow/c_glib/test/dataset/test-file-system-dataset-factory.rb @@ -0,0 +1,73 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDatasetFileSystemDatasetFactory < Test::Unit::TestCase + include Helper::Buildable + include Helper::Writable + + def setup + omit("Arrow Dataset is required") unless defined?(ArrowDataset) + Dir.mktmpdir do |tmpdir| + @dir = tmpdir + @format = ArrowDataset::IPCFileFormat.new + @path1 = File.join(@dir, "table1.arrow") + @table1 = build_table(visible: [ + build_boolean_array([true, false, true]), + build_boolean_array([false, true, false, true]), + ], + point: [ + build_int32_array([1, 2, 3]), + build_int32_array([-1, -2, -3, -4]), + ]) + write_table(@table1, @path1) + @path2 = File.join(@dir, "table2.arrow") + @table2 = build_table(visible: [ + build_boolean_array([false, true]), + build_boolean_array([true]), + ], + point: [ + build_int32_array([10]), + build_int32_array([-10, -20]), + ]) + write_table(@table2, @path2) + yield + end + end + + def test_file_system + factory = ArrowDataset::FileSystemDatasetFactory.new(@format) + factory.file_system = Arrow::LocalFileSystem.new + factory.add_path(File.expand_path(@path1)) + dataset = factory.finish + assert_equal(@table1, dataset.to_table) + end + + def test_file_system_uri + factory = ArrowDataset::FileSystemDatasetFactory.new(@format) + factory.file_system_uri = build_file_uri(@path1) + dataset = factory.finish + assert_equal(@table1, dataset.to_table) + end + + def test_directory + factory = ArrowDataset::FileSystemDatasetFactory.new(@format) + factory.file_system_uri = build_file_uri(@dir) + dataset = factory.finish + assert_equal(@table1.concatenate([@table2]), + dataset.to_table) + end +end diff --git a/src/arrow/c_glib/test/dataset/test-file-system-dataset.rb b/src/arrow/c_glib/test/dataset/test-file-system-dataset.rb new file mode 100644 index 000000000..1aef38fcc --- /dev/null +++ b/src/arrow/c_glib/test/dataset/test-file-system-dataset.rb @@ -0,0 +1,89 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDatasetFileSystemDataset < Test::Unit::TestCase + include Helper::Buildable + include Helper::Readable + + def setup + omit("Arrow Dataset is required") unless defined?(ArrowDataset) + Dir.mktmpdir do |tmpdir| + @dir = tmpdir + @format = ArrowDataset::IPCFileFormat.new + @factory = ArrowDataset::FileSystemDatasetFactory.new(@format) + @file_system = Arrow::LocalFileSystem.new + @factory.file_system = @file_system + partitioning_schema = build_schema(label: Arrow::StringDataType.new) + @partitioning = + ArrowDataset::DirectoryPartitioning.new(partitioning_schema) + @factory.partitioning = @partitioning + yield + end + end + + def test_type_name + dataset = @factory.finish + assert_equal("filesystem", dataset.type_name) + end + + def test_format + dataset = @factory.finish + assert_equal(@format, dataset.format) + end + + def test_file_system + dataset = @factory.finish + assert_equal(@file_system, dataset.file_system) + end + + def test_partitioning + dataset = @factory.finish + assert_equal(@partitioning, dataset.partitioning) + end + + def test_read_write + table = build_table(label: build_string_array(["a", "a", "b", "c"]), + count: build_int32_array([1, 10, 2, 3])) + table_reader = Arrow::TableBatchReader.new(table) + scanner_builder = ArrowDataset::ScannerBuilder.new(table_reader) + scanner_builder.use_async = true + scanner = scanner_builder.finish + options = ArrowDataset::FileSystemDatasetWriteOptions.new + options.file_write_options = @format.default_write_options + options.file_system = Arrow::LocalFileSystem.new + options.base_dir = @dir + options.base_name_template = "{i}.arrow" + options.partitioning = @partitioning + ArrowDataset::FileSystemDataset.write_scanner(scanner, options) + Find.find(@dir) do |path| + @factory.add_path(path) if File.file?(path) + end + @factory.partition_base_dir = @dir + dataset = @factory.finish + assert_equal(build_table(count: [ + build_int32_array([1, 10]), + build_int32_array([2]), + build_int32_array([3]), + ], + label: [ + build_string_array(["a", "a"]), + build_string_array(["b"]), + build_string_array(["c"]), + ]), + dataset.to_table) + end +end diff --git a/src/arrow/c_glib/test/dataset/test-file-writer.rb b/src/arrow/c_glib/test/dataset/test-file-writer.rb new file mode 100644 index 000000000..5b25d6044 --- /dev/null +++ b/src/arrow/c_glib/test/dataset/test-file-writer.rb @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDatasetFileWriter < Test::Unit::TestCase + include Helper::Buildable + include Helper::Readable + + def setup + omit("Arrow Dataset is required") unless defined?(ArrowDataset) + Dir.mktmpdir do |tmpdir| + @dir = tmpdir + @format = ArrowDataset::IPCFileFormat.new + @file_system = Arrow::LocalFileSystem.new + @path = File.join(@dir, "data.arrow") + @output = @file_system.open_output_stream(@path) + @schema = build_schema(visible: Arrow::BooleanDataType.new, + point: Arrow::UInt8DataType.new) + @writer = @format.open_writer(@output, + @file_system, + @path, + @schema, + @format.default_write_options) + yield + end + end + + def test_write_record_batch + record_batch = build_record_batch( + visible: build_boolean_array([true, false, true]), + point: build_uint8_array([1, 2, 3])) + @writer.write_record_batch(record_batch) + @writer.finish + @output.close + read_table(@path) do |written_table| + assert_equal(Arrow::Table.new(record_batch.schema, + [record_batch]), + written_table) + end + end + + def test_write_record_batch_reader + table = build_table(visible: build_boolean_array([true, false, true]), + point: build_uint8_array([1, 2, 3])) + @writer.write_record_batch_reader(Arrow::TableBatchReader.new(table)) + @writer.finish + @output.close + read_table(@path) do |written_table| + assert_equal(table, written_table) + end + end +end diff --git a/src/arrow/c_glib/test/dataset/test-partitioning-options.rb b/src/arrow/c_glib/test/dataset/test-partitioning-options.rb new file mode 100644 index 000000000..9ff585aa7 --- /dev/null +++ b/src/arrow/c_glib/test/dataset/test-partitioning-options.rb @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDatasetPartitioningOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + omit("Arrow Dataset is required") unless defined?(ArrowDataset) + @options = ArrowDataset::PartitioningOptions.new + end + + def test_infer_dictionary + assert_false(@options.infer_dictionary?) + @options.infer_dictionary = true + assert_true(@options.infer_dictionary?) + end + + def test_schema + assert_nil(@options.schema) + schema = build_schema(year: Arrow::UInt16DataType.new) + @options.schema = schema + assert_equal(schema, @options.schema) + end + + def test_segment_encoding + assert_equal(ArrowDataset::SegmentEncoding::NONE, + @options.segment_encoding) + @options.segment_encoding = :uri + assert_equal(ArrowDataset::SegmentEncoding::URI, + @options.segment_encoding) + end +end diff --git a/src/arrow/c_glib/test/dataset/test-partitioning.rb b/src/arrow/c_glib/test/dataset/test-partitioning.rb new file mode 100644 index 000000000..2b33b1eaa --- /dev/null +++ b/src/arrow/c_glib/test/dataset/test-partitioning.rb @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDatasetPartitioning < Test::Unit::TestCase + include Helper::Buildable + + def setup + omit("Arrow Dataset is required") unless defined?(ArrowDataset) + end + + def test_default + assert_equal("default", ArrowDataset::Partitioning.new.type_name) + end + + def test_directory + schema = build_schema(year: Arrow::UInt16DataType.new) + partitioning = ArrowDataset::DirectoryPartitioning.new(schema) + assert_equal("directory", partitioning.type_name) + end +end diff --git a/src/arrow/c_glib/test/dataset/test-scanner-builder.rb b/src/arrow/c_glib/test/dataset/test-scanner-builder.rb new file mode 100644 index 000000000..5674db4c3 --- /dev/null +++ b/src/arrow/c_glib/test/dataset/test-scanner-builder.rb @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDatasetScannerBuilder < Test::Unit::TestCase + include Helper::Buildable + include Helper::Writable + + def setup + omit("Arrow Dataset is required") unless defined?(ArrowDataset) + Dir.mktmpdir do |tmpdir| + path = File.join(tmpdir, "table.arrow") + @table = build_table(visible: [ + build_boolean_array([true, false, true]), + build_boolean_array([false, true, false, true]), + ], + point: [ + build_int32_array([1, 2, 3]), + build_int32_array([-1, -2, -3, -4]), + ]) + @format = ArrowDataset::IPCFileFormat.new + write_table(@table, path) + factory = ArrowDataset::FileSystemDatasetFactory.new(@format) + factory.file_system_uri = build_file_uri(path) + @dataset = factory.finish + @builder = @dataset.begin_scan + yield + end + end + + def test_new_record_batch_reader + reader = Arrow::TableBatchReader.new(@table) + builder = ArrowDataset::ScannerBuilder.new(reader) + scanner = builder.finish + assert_equal(@table, scanner.to_table) + end + + def test_filter + visible = Arrow::FieldExpression.new("visible") + true_scalar = Arrow::BooleanScalar.new(true) + true_datum = Arrow::ScalarDatum.new(true_scalar) + true_literal = Arrow::LiteralExpression.new(true_datum) + filter = Arrow::CallExpression.new("equal", [visible, true_literal]) + @builder.filter = filter + scanner = @builder.finish + assert_equal(build_table(visible: [ + build_boolean_array([true, true]), + build_boolean_array([true, true]), + ], + point: [ + build_int32_array([1, 3]), + build_int32_array([-2, -4]), + ]), + scanner.to_table) + end + + def test_use_async + @builder.use_async = true + scanner = @builder.finish + assert_equal(@table, scanner.to_table) + end +end diff --git a/src/arrow/c_glib/test/dataset/test-scanner.rb b/src/arrow/c_glib/test/dataset/test-scanner.rb new file mode 100644 index 000000000..f7702d490 --- /dev/null +++ b/src/arrow/c_glib/test/dataset/test-scanner.rb @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDatasetScanner < Test::Unit::TestCase + include Helper::Buildable + include Helper::Writable + + def setup + omit("Arrow Dataset is required") unless defined?(ArrowDataset) + Dir.mktmpdir do |tmpdir| + path = File.join(tmpdir, "table.arrow") + @table = build_table(visible: [ + build_boolean_array([true, false, true]), + build_boolean_array([false, true, false, true]), + ], + point: [ + build_int32_array([1, 2, 3]), + build_int32_array([-1, -2, -3, -4]), + ]) + @format = ArrowDataset::IPCFileFormat.new + write_table(@table, path) + factory = ArrowDataset::FileSystemDatasetFactory.new(@format) + factory.file_system_uri = build_file_uri(path) + @dataset = factory.finish + builder = @dataset.begin_scan + @scanner = builder.finish + yield + end + end + + def test_to_table + assert_equal(@table, @scanner.to_table) + end +end diff --git a/src/arrow/c_glib/test/file-system-tests.rb b/src/arrow/c_glib/test/file-system-tests.rb new file mode 100644 index 000000000..3c9db8266 --- /dev/null +++ b/src/arrow/c_glib/test/file-system-tests.rb @@ -0,0 +1,383 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module FileSystemTests + private def all_entries + selector = Arrow::FileSelector.new + selector.base_dir = "" + selector.recursive = true + infos = @fs.get_file_infos_selector(selector) + infos.map {|info| [info.path, info.type.nick.to_sym]}.to_h + end + + private def mkpath(path) + @fs.create_dir(path, true) + end + + private def create_file(path, content=nil) + stream = @fs.open_output_stream(path) + stream.write(content) if content + stream.close + end + + private def read_file(path) + stream = @fs.open_input_stream(path) + size = @fs.get_file_info(path).size + bytes = stream.read_bytes(size) + stream.close + bytes.to_s + end + + private def file?(path) + info = @fs.get_file_info(path) + info.file? + rescue Arrow::Error::Io + false + end + + private def directory?(path) + info = @fs.get_file_info(path) + info.dir? + rescue Arrow::Error::Io + false + end + + def test_empty + assert { all_entries.empty? } + end + + def test_create_dir + @fs.create_dir("AB/CD/EF", true) # recursive + @fs.create_dir("AB/GH", false) # non-recursive + assert_equal({ + "AB" => :dir, + "AB/CD" => :dir, + "AB/CD/EF" => :dir, + "AB/GH" => :dir + }, + all_entries) + end + + def test_create_dir_with_nonexistent_parent + assert_raise(Arrow::Error::Io) do + @fs.create_dir("AB/GH/IJ", false) # non-recursive, parent doesn't exist + end + assert_equal({}, + all_entries) + end + + def test_create_dir_under_file + create_file("empty_file") + assert_raise(Arrow::Error::Io) do + @fs.create_dir(File.join("empty_file", "XY"), true) + end + assert_equal({"empty_file" => :file}, + all_entries) + end + + def test_delete_dir + mkpath("AB/CD/EF") + mkpath("AB/GH/IJ") + create_file("AB/abc") + create_file("AB/CD/def") + create_file("AB/CD/EF/ghi") + + @fs.delete_dir("AB/CD") + @fs.delete_dir("AB/GH/IJ") + + assert_equal({ + "AB" => :dir, + "AB/GH" => :dir, + "AB/abc" => :file + }, + all_entries) + end + + def test_delete_dir_contents + mkpath("AB/CD/EF") + mkpath("AB/GH/IJ") + create_file("AB/abc") + create_file("AB/CD/def") + create_file("AB/CD/EF/ghi") + + @fs.delete_dir_contents("AB/CD") + @fs.delete_dir_contents("AB/GH/IJ") + + assert_equal({ + "AB" => :dir, + "AB/CD" => :dir, + "AB/GH" => :dir, + "AB/GH/IJ" => :dir, + "AB/abc" => :file + }, + all_entries) + end + + def test_delete_file + mkpath("AB") + create_file("AB/def") + assert { file?("AB/def") } + + @fs.delete_file("AB/def") + assert { not file?("AB/def") } + end + + def test_delete_files + mkpath("AB") + create_file("abc") + { + def: 123, + ghi: 456, + jkl: 789, + mno: 789 + }.each do |name, content| + create_file(File.join("AB", name.to_s), content.to_s) + end + + assert_equal({ + "AB" => :dir, + "AB/def" => :file, + "AB/ghi" => :file, + "AB/jkl" => :file, + "AB/mno" => :file, + "abc" => :file + }, + all_entries) + + @fs.delete_files(["abc", "AB/def"]) + assert_equal({ + "AB" => :dir, + "AB/ghi" => :file, + "AB/jkl" => :file, + "AB/mno" => :file + }, + all_entries) + end + + def test_move_file + mkpath("AB/CD") + mkpath("EF") + create_file("abc") + assert_equal({ + "AB" => :dir, + "AB/CD" => :dir, + "EF" => :dir, + "abc" => :file + }, + all_entries) + + @fs.move("abc", "AB/CD/ghi") + assert_equal({ + "AB" => :dir, + "AB/CD" => :dir, + "EF" => :dir, + "AB/CD/ghi" => :file + }, + all_entries) + end + + def move_dir_is_supported? + true + end + + def test_move_dir + omit("move_dir is not allowed") unless move_dir_is_supported? + + mkpath("AB/CD") + mkpath("EF") + assert_equal({ + "AB" => :dir, + "AB/CD" => :dir, + "EF" => :dir + }, + all_entries) + + @fs.move("AB", "GH") + assert_equal({ + "EF" => :dir, + "GH" => :dir, + "GH/CD" => :dir + }, + all_entries) + end + + def test_copy_file + mkpath("AB/CD") + mkpath("EF") + create_file("AB/abc", "data") + assert_equal({ + "AB" => :dir, + "AB/CD" => :dir, + "EF" => :dir, + "AB/abc" => :file + }, + all_entries) + + @fs.copy_file("AB/abc", "def") + assert_equal({ + "AB" => :dir, + "AB/CD" => :dir, + "EF" => :dir, + "AB/abc" => :file, + "def" => :file + }, + all_entries) + assert_equal("data", + read_file("def")) + end + + def test_get_file_info + mkpath("AB/CD") + create_file("AB/CD/ghi", "some data") + + info = @fs.get_file_info("AB") + assert_equal(Arrow::FileType::DIR, + info.type) + assert_equal("AB", + info.base_name) + assert_equal(-1, + info.size) + assert do + info.mtime > 0 + end + + info = @fs.get_file_info("AB/CD/ghi") + assert_equal(Arrow::FileType::FILE, + info.type) + assert_equal("ghi", + info.base_name) + assert_equal(9, + info.size) + assert do + info.mtime > 0 + end + end + + def test_get_file_infos_paths + mkpath("AB/CD") + create_file("AB/CD/ghi", "some data") + + infos = @fs.get_file_infos_paths(["AB", "AB/CD/ghi"]) + assert_equal({ + "AB" => -1, + "AB/CD/ghi" => 9 + }, + infos.map {|info| [info.path, info.size]}.to_h) + end + + def test_get_file_infos_selector + mkpath("AB/CD") + create_file("abc", "data") + create_file("AB/def", "some data") + create_file("AB/CD/ghi", "some other data") + + selector = Arrow::FileSelector.new + infos = @fs.get_file_infos_selector(selector) + assert_equal({ + "AB" => -1, + "abc" => 4 + }, + infos.map {|info| [info.path, info.size]}.to_h) + + selector.base_dir = "AB" + infos = @fs.get_file_infos_selector(selector) + assert_equal({ + "AB/CD" => -1, + "AB/def" => 9 + }, + infos.map {|info| [info.path, info.size]}.to_h) + end + + def test_get_file_infos_selector_with_recursion + mkpath("AB/CD") + create_file("abc", "data") + create_file("AB/def", "some data") + create_file("AB/CD/ghi", "some other data") + + selector = Arrow::FileSelector.new + selector.recursive = true + infos = @fs.get_file_infos_selector(selector) + assert_equal({ + "AB" => -1, + "AB/CD" => -1, + "AB/CD/ghi" => 15, + "AB/def" => 9, + "abc" => 4 + }, + infos.map {|info| [info.path, info.size]}.to_h) + end + + def test_open_output_stream + assert { not file?("abc") } + stream = @fs.open_output_stream("abc") + assert_equal(0, stream.tell) + stream.write("some ") + stream.write("data") + stream.close + assert { file?("abc") } + assert_equal("some data", + read_file("abc")) + + stream = @fs.open_output_stream("abc") + assert_equal(0, stream.tell) + stream.write("other data") + stream.close + assert { file?("abc") } + assert_equal("other data", + read_file("abc")) + end + + def test_open_append_stream + assert { not file?("abc") } + stream = @fs.open_append_stream("abc") + assert_equal(0, stream.tell) + stream.write("some ") + stream.close + assert { file?("abc") } + assert_equal("some ", + read_file("abc")) + + stream = @fs.open_append_stream("abc") + assert_equal(5, stream.tell) + stream.write("data") + stream.close + assert { file?("abc") } + assert_equal("some data", + read_file("abc")) + end + + def test_open_input_stream + mkpath("AB") + create_file("AB/abc", "some data") + + stream = @fs.open_input_stream("AB/abc") + bytes = stream.read_bytes(4) + assert_equal("some", + bytes.to_s) + stream.close + end + + def test_open_input_file + create_file("ab", "some data") + + stream = @fs.open_input_file("ab") + bytes = stream.read_at_bytes(5, 4) + assert_equal("data", + bytes.to_s) + stream.close + end +end diff --git a/src/arrow/c_glib/test/fixture/TestOrcFile.test1.orc b/src/arrow/c_glib/test/fixture/TestOrcFile.test1.orc Binary files differnew file mode 100644 index 000000000..4fb0beff8 --- /dev/null +++ b/src/arrow/c_glib/test/fixture/TestOrcFile.test1.orc diff --git a/src/arrow/c_glib/test/flight/test-client.rb b/src/arrow/c_glib/test/flight/test-client.rb new file mode 100644 index 000000000..f6660a4ca --- /dev/null +++ b/src/arrow/c_glib/test/flight/test-client.rb @@ -0,0 +1,64 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFlightClient < Test::Unit::TestCase + include Helper::Omittable + + def setup + @server = nil + omit("Arrow Flight is required") unless defined?(ArrowFlight) + omit("Unstable on Windows") if Gem.win_platform? + require_gi_bindings(3, 4, 7) + @server = Helper::FlightServer.new + host = "127.0.0.1" + location = ArrowFlight::Location.new("grpc://#{host}:0") + options = ArrowFlight::ServerOptions.new(location) + @server.listen(options) + @location = ArrowFlight::Location.new("grpc://#{host}:#{@server.port}") + end + + def teardown + return if @server.nil? + @server.shutdown + end + + def test_list_flights + client = ArrowFlight::Client.new(@location) + generator = Helper::FlightInfoGenerator.new + assert_equal([generator.page_view], + client.list_flights) + end + + sub_test_case("#do_get") do + def test_success + client = ArrowFlight::Client.new(@location) + info = client.list_flights.first + endpoint = info.endpoints.first + generator = Helper::FlightInfoGenerator.new + reader = client.do_get(endpoint.ticket) + assert_equal(generator.page_view_table, + reader.read_all) + end + + def test_error + client = ArrowFlight::Client.new(@location) + assert_raise(Arrow::Error::Invalid) do + client.do_get(ArrowFlight::Ticket.new("invalid")) + end + end + end +end diff --git a/src/arrow/c_glib/test/flight/test-command-descriptor.rb b/src/arrow/c_glib/test/flight/test-command-descriptor.rb new file mode 100644 index 000000000..316973287 --- /dev/null +++ b/src/arrow/c_glib/test/flight/test-command-descriptor.rb @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFlightCommandDescriptor < Test::Unit::TestCase + def setup + omit("Arrow Flight is required") unless defined?(ArrowFlight) + end + + def test_to_s + descriptor = ArrowFlight::CommandDescriptor.new("command") + assert_equal("FlightDescriptor<cmd = 'command'>", + descriptor.to_s) + end + + def test_command + command = "command" + descriptor = ArrowFlight::CommandDescriptor.new(command) + assert_equal(command, descriptor.command) + end + + sub_test_case("#==") do + def test_true + descriptor1 = ArrowFlight::CommandDescriptor.new("command") + descriptor2 = ArrowFlight::CommandDescriptor.new("command") + assert do + descriptor1 == descriptor2 + end + end + + def test_false + descriptor1 = ArrowFlight::CommandDescriptor.new("command1") + descriptor2 = ArrowFlight::CommandDescriptor.new("command2") + assert do + not (descriptor1 == descriptor2) + end + end + end +end diff --git a/src/arrow/c_glib/test/flight/test-criteria.rb b/src/arrow/c_glib/test/flight/test-criteria.rb new file mode 100644 index 000000000..d5f60a895 --- /dev/null +++ b/src/arrow/c_glib/test/flight/test-criteria.rb @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFlightCriteria < Test::Unit::TestCase + def setup + omit("Arrow Flight is required") unless defined?(ArrowFlight) + end + + def test_expression + expression = "expression" + criteria = ArrowFlight::Criteria.new(expression) + assert_equal(expression, + criteria.expression.to_s) + end +end diff --git a/src/arrow/c_glib/test/flight/test-endpoint.rb b/src/arrow/c_glib/test/flight/test-endpoint.rb new file mode 100644 index 000000000..06cddf001 --- /dev/null +++ b/src/arrow/c_glib/test/flight/test-endpoint.rb @@ -0,0 +1,67 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFlightEndpoint < Test::Unit::TestCase + def setup + omit("Arrow Flight is required") unless defined?(ArrowFlight) + end + + def test_ticket + ticket = ArrowFlight::Ticket.new("data") + locations = [ + ArrowFlight::Location.new("grpc://127.0.0.1:2929"), + ArrowFlight::Location.new("grpc+tcp://127.0.0.1:12929"), + ] + endpoint = ArrowFlight::Endpoint.new(ticket, locations) + assert_equal(ticket, + endpoint.ticket) + end + + def test_locations + ticket = ArrowFlight::Ticket.new("data") + locations = [ + ArrowFlight::Location.new("grpc://127.0.0.1:2929"), + ArrowFlight::Location.new("grpc+tcp://127.0.0.1:12929"), + ] + endpoint = ArrowFlight::Endpoint.new(ticket, locations) + assert_equal(locations, + endpoint.locations) + end + + sub_test_case("#==") do + def test_true + ticket = ArrowFlight::Ticket.new("data") + location = ArrowFlight::Location.new("grpc://127.0.0.1:2929") + endpoint1 = ArrowFlight::Endpoint.new(ticket, [location]) + endpoint2 = ArrowFlight::Endpoint.new(ticket, [location]) + assert do + endpoint1 == endpoint2 + end + end + + def test_false + ticket = ArrowFlight::Ticket.new("data") + location1 = ArrowFlight::Location.new("grpc://127.0.0.1:2929") + location2 = ArrowFlight::Location.new("grpc://127.0.0.1:1129") + endpoint1 = ArrowFlight::Endpoint.new(ticket, [location1]) + endpoint2 = ArrowFlight::Endpoint.new(ticket, [location2]) + assert do + not (endpoint1 == endpoint2) + end + end + end +end diff --git a/src/arrow/c_glib/test/flight/test-info.rb b/src/arrow/c_glib/test/flight/test-info.rb new file mode 100644 index 000000000..5bf0fbfad --- /dev/null +++ b/src/arrow/c_glib/test/flight/test-info.rb @@ -0,0 +1,78 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFlightInfo < Test::Unit::TestCase + include Helper::Writable + + def setup + omit("Arrow Flight is required") unless defined?(ArrowFlight) + @generator = Helper::FlightInfoGenerator.new + end + + sub_test_case("#get_schema") do + def test_with_options + info = @generator.page_view + table = @generator.page_view_table + options = Arrow::ReadOptions.new + assert_equal(table.schema, + info.get_schema(options)) + end + + def test_without_options + info = @generator.page_view + table = @generator.page_view_table + assert_equal(table.schema, + info.get_schema) + end + end + + def test_descriptor + info = @generator.page_view + assert_equal(@generator.page_view_descriptor, + info.descriptor) + end + + def test_endpoints + info = @generator.page_view + assert_equal(@generator.page_view_endpoints, + info.endpoints) + end + + def test_total_records + info = @generator.page_view + table = @generator.page_view_table + assert_equal(table.n_rows, + info.total_records) + end + + def test_total_bytes + info = @generator.page_view + table = @generator.page_view_table + output = Arrow::ResizableBuffer.new(0) + write_table(table, output, type: :stream) + assert_equal(output.size, + info.total_bytes) + end + + def test_equal + info1 = @generator.page_view + info2 = @generator.page_view + assert do + info1 == info2 + end + end +end diff --git a/src/arrow/c_glib/test/flight/test-location.rb b/src/arrow/c_glib/test/flight/test-location.rb new file mode 100644 index 000000000..5b1679322 --- /dev/null +++ b/src/arrow/c_glib/test/flight/test-location.rb @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFlightLocation < Test::Unit::TestCase + def setup + omit("Arrow Flight is required") unless defined?(ArrowFlight) + end + + def test_to_s + location = ArrowFlight::Location.new("grpc://127.0.0.1:2929") + assert_equal("grpc://127.0.0.1:2929", location.to_s) + end + + def test_scheme + location = ArrowFlight::Location.new("grpc://127.0.0.1:2929") + assert_equal("grpc", location.scheme) + end + + def test_equal + location1 = ArrowFlight::Location.new("grpc://127.0.0.1:2929") + location2 = ArrowFlight::Location.new("grpc://127.0.0.1:2929") + assert do + location1 == location2 + end + end +end diff --git a/src/arrow/c_glib/test/flight/test-path-descriptor.rb b/src/arrow/c_glib/test/flight/test-path-descriptor.rb new file mode 100644 index 000000000..441fc7bb0 --- /dev/null +++ b/src/arrow/c_glib/test/flight/test-path-descriptor.rb @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFlightPathDescriptor < Test::Unit::TestCase + def setup + omit("Arrow Flight is required") unless defined?(ArrowFlight) + end + + def test_to_s + descriptor = ArrowFlight::PathDescriptor.new(["a", "b", "c"]) + assert_equal("FlightDescriptor<path = 'a/b/c'>", + descriptor.to_s) + end + + def test_paths + paths = ["a", "b", "c"] + descriptor = ArrowFlight::PathDescriptor.new(paths) + assert_equal(paths, descriptor.paths) + end + + sub_test_case("#==") do + def test_true + descriptor1 = ArrowFlight::PathDescriptor.new(["a", "b", "c"]) + descriptor2 = ArrowFlight::PathDescriptor.new(["a", "b", "c"]) + assert do + descriptor1 == descriptor2 + end + end + + def test_false + descriptor1 = ArrowFlight::PathDescriptor.new(["a", "b", "c"]) + descriptor2 = ArrowFlight::PathDescriptor.new(["A", "B", "C"]) + assert do + not (descriptor1 == descriptor2) + end + end + end +end diff --git a/src/arrow/c_glib/test/flight/test-server-options.rb b/src/arrow/c_glib/test/flight/test-server-options.rb new file mode 100644 index 000000000..93a90297e --- /dev/null +++ b/src/arrow/c_glib/test/flight/test-server-options.rb @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFlightServerOptions < Test::Unit::TestCase + def setup + omit("Arrow Flight is required") unless defined?(ArrowFlight) + end + + def test_location + location = ArrowFlight::Location.new("grpc://127.0.0.1:0") + options = ArrowFlight::ServerOptions.new(location) + assert_equal(location, options.location) + end +end diff --git a/src/arrow/c_glib/test/flight/test-stream-reader.rb b/src/arrow/c_glib/test/flight/test-stream-reader.rb new file mode 100644 index 000000000..f2e6229b0 --- /dev/null +++ b/src/arrow/c_glib/test/flight/test-stream-reader.rb @@ -0,0 +1,69 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFlightStreamReader < Test::Unit::TestCase + include Helper::Omittable + + def setup + @server = nil + omit("Arrow Flight is required") unless defined?(ArrowFlight) + omit("Unstable on Windows") if Gem.win_platform? + require_gi_bindings(3, 4, 5) + @server = Helper::FlightServer.new + host = "127.0.0.1" + location = ArrowFlight::Location.new("grpc://#{host}:0") + options = ArrowFlight::ServerOptions.new(location) + @server.listen(options) + location = ArrowFlight::Location.new("grpc://#{host}:#{@server.port}") + client = ArrowFlight::Client.new(location) + @generator = Helper::FlightInfoGenerator.new + @reader = client.do_get(@generator.page_view_ticket) + end + + def teardown + return if @server.nil? + @server.shutdown + end + + def test_read_next + chunks = [] + loop do + chunk = @reader.read_next + break if chunk.nil? + chunks << chunk + end + chunks_content = chunks.collect do |chunk| + [ + chunk.data, + chunk.metadata&.data&.to_s, + ] + end + table_batch_reader = Arrow::TableBatchReader.new(@generator.page_view_table) + assert_equal([ + [ + table_batch_reader.read_next, + nil, + ], + ], + chunks_content) + end + + def test_read_all + assert_equal(@generator.page_view_table, + @reader.read_all) + end +end diff --git a/src/arrow/c_glib/test/flight/test-ticket.rb b/src/arrow/c_glib/test/flight/test-ticket.rb new file mode 100644 index 000000000..976089762 --- /dev/null +++ b/src/arrow/c_glib/test/flight/test-ticket.rb @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFlightTicket < Test::Unit::TestCase + def setup + omit("Arrow Flight is required") unless defined?(ArrowFlight) + end + + def test_data + data = "data" + ticket = ArrowFlight::Ticket.new(data) + assert_equal(data, + ticket.data.to_s) + end + + sub_test_case("#==") do + def test_true + ticket1 = ArrowFlight::Ticket.new("data") + ticket2 = ArrowFlight::Ticket.new("data") + assert do + ticket1 == ticket2 + end + end + + def test_false + ticket1 = ArrowFlight::Ticket.new("data1") + ticket2 = ArrowFlight::Ticket.new("data2") + assert do + not (ticket1 == ticket2) + end + end + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-binary-literal-node.rb b/src/arrow/c_glib/test/gandiva/test-binary-literal-node.rb new file mode 100644 index 000000000..fddf74830 --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-binary-literal-node.rb @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaBinaryLiteralNode < Test::Unit::TestCase + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + @value = "\x00\x01\x02\x03\x04" + end + + sub_test_case(".new") do + def test_string + node = Gandiva::BinaryLiteralNode.new(@value) + assert_equal(@value, node.value.to_s) + end + + def test_bytes + bytes_value = GLib::Bytes.new(@value) + node = Gandiva::BinaryLiteralNode.new(bytes_value) + assert_equal(@value, node.value.to_s) + end + end + + sub_test_case("instance methods") do + def setup + super + @node = Gandiva::BinaryLiteralNode.new(@value) + end + + def test_return_type + assert_equal(Arrow::BinaryDataType.new, @node.return_type) + end + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-boolean-literal-node.rb b/src/arrow/c_glib/test/gandiva/test-boolean-literal-node.rb new file mode 100644 index 000000000..6e18a7621 --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-boolean-literal-node.rb @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaBooleanLiteralNode < Test::Unit::TestCase + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + @value = true + @node = Gandiva::BooleanLiteralNode.new(@value) + end + + def test_value + assert_equal(@value, @node.value?) + end + + def test_return_type + assert_equal(Arrow::BooleanDataType.new, @node.return_type) + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-boolean-node.rb b/src/arrow/c_glib/test/gandiva/test-boolean-node.rb new file mode 100644 index 000000000..a14685e4e --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-boolean-node.rb @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaBooleanNode < Test::Unit::TestCase + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + field1 = Arrow::Field.new("field1", Arrow::Int32DataType.new) + field2 = Arrow::Field.new("field2", Arrow::Int32DataType.new) + @field1_node = Gandiva::FieldNode.new(field1) + @field2_node = Gandiva::FieldNode.new(field2) + end + + def test_and + and_node = Gandiva::AndNode.new([@field1_node, @field2_node]) + assert_equal([@field1_node, @field2_node], + and_node.children) + end + + def test_or + or_node = Gandiva::OrNode.new([@field1_node, @field2_node]) + assert_equal([@field1_node, @field2_node], + or_node.children) + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-condition.rb b/src/arrow/c_glib/test/gandiva/test-condition.rb new file mode 100644 index 000000000..51fb9f1b1 --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-condition.rb @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaCondition < Test::Unit::TestCase + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + field1 = Arrow::Field.new("field1", Arrow::Int32DataType.new) + field2 = Arrow::Field.new("field2", Arrow::Int32DataType.new) + field1_node = Gandiva::FieldNode.new(field1) + field2_node = Gandiva::FieldNode.new(field2) + function_node = Gandiva::FunctionNode.new("equal", + [field1_node, field2_node], + Arrow::BooleanDataType.new) + @condition = Gandiva::Condition.new(function_node) + end + + def test_to_s + assert_equal("bool equal((int32) field1, (int32) field2)", + @condition.to_s) + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-double-literal-node.rb b/src/arrow/c_glib/test/gandiva/test-double-literal-node.rb new file mode 100644 index 000000000..27cc3aea2 --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-double-literal-node.rb @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaDoubleLiteralNode < Test::Unit::TestCase + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + @value = 1.5 + @node = Gandiva::DoubleLiteralNode.new(@value) + end + + def test_value + assert_equal(@value, @node.value) + end + + def test_return_type + assert_equal(Arrow::DoubleDataType.new, @node.return_type) + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-expression.rb b/src/arrow/c_glib/test/gandiva/test-expression.rb new file mode 100644 index 000000000..2e27d6e67 --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-expression.rb @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaExpression < Test::Unit::TestCase + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + augend = Arrow::Field.new("augend", Arrow::Int32DataType.new) + addend = Arrow::Field.new("addend", Arrow::Int32DataType.new) + augend_node = Gandiva::FieldNode.new(augend) + addend_node = Gandiva::FieldNode.new(addend) + @function_node = Gandiva::FunctionNode.new("add", + [augend_node, addend_node], + Arrow::Int32DataType.new) + @sum = Arrow::Field.new("sum", Arrow::Int32DataType.new) + @expression = Gandiva::Expression.new(@function_node, @sum) + end + + def test_readers + assert_equal([ + @function_node, + @sum + ], + [ + @expression.root_node, + @expression.result_field + ]) + end + + def test_to_s + assert_equal("int32 add((int32) augend, (int32) addend)", @expression.to_s) + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-field-node.rb b/src/arrow/c_glib/test/gandiva/test-field-node.rb new file mode 100644 index 000000000..905088128 --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-field-node.rb @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaFieldNode < Test::Unit::TestCase + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + @field = Arrow::Field.new("valid", Arrow::BooleanDataType.new) + @node = Gandiva::FieldNode.new(@field) + end + + def test_field + assert_equal(@field, @node.field) + end + + def test_return_type + assert_equal(@field.data_type, @node.return_type) + end + + def test_to_s + assert_equal("(bool) valid", + @node.to_s) + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-filter.rb b/src/arrow/c_glib/test/gandiva/test-filter.rb new file mode 100644 index 000000000..3da777431 --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-filter.rb @@ -0,0 +1,51 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaFilter < Test::Unit::TestCase + include Helper::Buildable + + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + + field1 = Arrow::Field.new("field1", Arrow::Int32DataType.new) + field2 = Arrow::Field.new("field2", Arrow::Int32DataType.new) + schema = Arrow::Schema.new([field1, field2]) + field_node1 = Gandiva::FieldNode.new(field1) + field_node2 = Gandiva::FieldNode.new(field2) + equal_function_node = + Gandiva::FunctionNode.new("equal", + [field_node1, field_node2], + Arrow::BooleanDataType.new) + condition = Gandiva::Condition.new(equal_function_node) + @filter = Gandiva::Filter.new(schema, condition) + + input_arrays = [ + build_int32_array([1, 2, 3, 4]), + build_int32_array([11, 2, 15, 4]), + ] + @record_batch = Arrow::RecordBatch.new(schema, + input_arrays[0].length, + input_arrays) + end + + def test_evaluate + selection_vector = Gandiva::UInt16SelectionVector.new(@record_batch.n_rows) + @filter.evaluate(@record_batch, selection_vector) + assert_equal(build_uint16_array([1, 3]), + selection_vector.to_array) + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-float-literal-node.rb b/src/arrow/c_glib/test/gandiva/test-float-literal-node.rb new file mode 100644 index 000000000..4a49eb374 --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-float-literal-node.rb @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaFloatLiteralNode < Test::Unit::TestCase + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + @value = 1.5 + @node = Gandiva::FloatLiteralNode.new(@value) + end + + def test_value + assert_equal(@value, @node.value) + end + + def test_return_type + assert_equal(Arrow::FloatDataType.new, @node.return_type) + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-function-node.rb b/src/arrow/c_glib/test/gandiva/test-function-node.rb new file mode 100644 index 000000000..cb4fe0a65 --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-function-node.rb @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaFunctionNode < Test::Unit::TestCase + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + end + + def test_readers + field1 = Arrow::Field.new("field1", Arrow::Int32DataType.new) + field2 = Arrow::Field.new("field2", Arrow::Int32DataType.new) + field1_node = Gandiva::FieldNode.new(field1) + field2_node = Gandiva::FieldNode.new(field2) + return_type = Arrow::Int64DataType.new + function_node = Gandiva::FunctionNode.new("add", + [field1_node, field2_node], + return_type) + assert_equal([ + "add", + [field1_node, field2_node], + return_type, + ], + [ + function_node.name, + function_node.parameters, + function_node.return_type, + ]) + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-function-registry.rb b/src/arrow/c_glib/test/gandiva/test-function-registry.rb new file mode 100644 index 000000000..25bac6673 --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-function-registry.rb @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaFunctionRegistry < Test::Unit::TestCase + include Helper::DataType + + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + @registry = Gandiva::FunctionRegistry.new + end + + sub_test_case("lookup") do + def test_found + native_function = @registry.native_functions[0] + assert_equal(native_function, + @registry.lookup(native_function.signatures[0])) + end + + def test_not_found + signature = Gandiva::FunctionSignature.new("nonexistent", + [], + boolean_data_type) + assert_nil(@registry.lookup(signature)) + end + end + + def test_native_functions + assert_equal([Gandiva::NativeFunction], + @registry.native_functions.collect(&:class).uniq) + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-function-signature.rb b/src/arrow/c_glib/test/gandiva/test-function-signature.rb new file mode 100644 index 000000000..ada3bbc8b --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-function-signature.rb @@ -0,0 +1,101 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaFunctionSignature < Test::Unit::TestCase + include Helper::DataType + + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + parameter_types = [string_data_type, string_data_type, int32_data_type] + @to_date = Gandiva::FunctionSignature.new("to_date", + parameter_types, + date64_data_type) + end + + def test_new + signature = Gandiva::FunctionSignature.new("add", + [ + int32_data_type, + int32_data_type, + ], + int32_data_type) + assert_equal("int32 add(int32, int32)", + signature.to_s) + end + + sub_test_case("equal") do + def test_true + add_int32_1 = Gandiva::FunctionSignature.new("add", + [ + int32_data_type, + int32_data_type, + ], + int32_data_type) + add_int32_2 = Gandiva::FunctionSignature.new("add", + [ + int32_data_type, + int32_data_type, + ], + int32_data_type) + assert do + add_int32_1 == add_int32_2 + end + end + + def test_false + add_int32 = Gandiva::FunctionSignature.new("add", + [ + int32_data_type, + int32_data_type, + ], + int32_data_type) + add_int16 = Gandiva::FunctionSignature.new("add", + [ + int16_data_type, + int16_data_type, + ], + int16_data_type) + assert do + add_int32 != add_int16 + end + end + end + + def test_to_string + assert_equal("date64[ms] to_date(string, string, int32)", + @to_date.to_s) + end + + def test_get_return_type + assert_equal(date64_data_type, + @to_date.return_type) + end + + def test_get_base_name + assert_equal("to_date", + @to_date.base_name) + end + + def test_get_param_types + assert_equal([ + string_data_type, + string_data_type, + int32_data_type, + ], + @to_date.param_types) + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-if-node.rb b/src/arrow/c_glib/test/gandiva/test-if-node.rb new file mode 100644 index 000000000..b00359590 --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-if-node.rb @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaIfNode < Test::Unit::TestCase + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + field1 = Arrow::Field.new("field1", Arrow::Int32DataType.new) + field2 = Arrow::Field.new("field2", Arrow::Int32DataType.new) + @then_node = Gandiva::FieldNode.new(field1) + @else_node = Gandiva::FieldNode.new(field2) + @return_type = Arrow::Int32DataType.new + @condition_node = Gandiva::FunctionNode.new("greater_than", + [@then_node, @else_node], + @return_type) + @if_node = Gandiva::IfNode.new(@condition_node, + @then_node, + @else_node, + @return_type) + end + + def test_readers + assert_equal([ + @condition_node, + @then_node, + @else_node, + @return_type + ], + [ + @if_node.condition_node, + @if_node.then_node, + @if_node.else_node, + @if_node.return_type + ]) + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-int16-literal-node.rb b/src/arrow/c_glib/test/gandiva/test-int16-literal-node.rb new file mode 100644 index 000000000..f8e6b2684 --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-int16-literal-node.rb @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaInt16LiteralNode < Test::Unit::TestCase + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + @value = -(2 ** 15) + @node = Gandiva::Int16LiteralNode.new(@value) + end + + def test_value + assert_equal(@value, @node.value) + end + + def test_return_type + assert_equal(Arrow::Int16DataType.new, @node.return_type) + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-int32-literal-node.rb b/src/arrow/c_glib/test/gandiva/test-int32-literal-node.rb new file mode 100644 index 000000000..3d1bf588c --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-int32-literal-node.rb @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaInt32LiteralNode < Test::Unit::TestCase + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + @value = -(2 ** 31) + @node = Gandiva::Int32LiteralNode.new(@value) + end + + def test_value + assert_equal(@value, @node.value) + end + + def test_return_type + assert_equal(Arrow::Int32DataType.new, @node.return_type) + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-int64-literal-node.rb b/src/arrow/c_glib/test/gandiva/test-int64-literal-node.rb new file mode 100644 index 000000000..b2ca3bf63 --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-int64-literal-node.rb @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaInt64LiteralNode < Test::Unit::TestCase + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + @value = -(2 ** 63) + @node = Gandiva::Int64LiteralNode.new(@value) + end + + def test_value + assert_equal(@value, @node.value) + end + + def test_return_type + assert_equal(Arrow::Int64DataType.new, @node.return_type) + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-int8-literal-node.rb b/src/arrow/c_glib/test/gandiva/test-int8-literal-node.rb new file mode 100644 index 000000000..8d917bd1b --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-int8-literal-node.rb @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaInt8LiteralNode < Test::Unit::TestCase + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + @value = -(2 ** 7) + @node = Gandiva::Int8LiteralNode.new(@value) + end + + def test_value + assert_equal(@value, @node.value) + end + + def test_return_type + assert_equal(Arrow::Int8DataType.new, @node.return_type) + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-native-function.rb b/src/arrow/c_glib/test/gandiva/test-native-function.rb new file mode 100644 index 000000000..7888f96b6 --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-native-function.rb @@ -0,0 +1,132 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaNativeFunction < Test::Unit::TestCase + include Helper::DataType + + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + @registry = Gandiva::FunctionRegistry.new + @not = lookup("not", [boolean_data_type], boolean_data_type) + @isnull = lookup("isnull", [int8_data_type], boolean_data_type) + end + + def lookup(name, param_types, return_type) + signature = Gandiva::FunctionSignature.new(name, + param_types, + return_type) + @registry.lookup(signature) + end + + def test_signatures + assert_equal([Gandiva::FunctionSignature], + @not.signatures.collect(&:class).uniq) + end + + sub_test_case("equal") do + def test_true + assert do + @not == @registry.lookup(@not.signatures[0]) + end + end + + def test_false + assert do + @not != @isnull + end + end + end + + def test_to_string + modulo = lookup("modulo", + [int64_data_type, int64_data_type], + int64_data_type) + assert_equal(modulo.signatures.collect(&:to_s).join(", "), + modulo.to_s) + end + + sub_test_case("get_result_nullbale_type") do + def test_if_null + assert_equal(Gandiva::ResultNullableType::IF_NULL, + @not.result_nullable_type) + end + + def test_never + assert_equal(Gandiva::ResultNullableType::NEVER, + @isnull.result_nullable_type) + end + + def test_internal + to_date = lookup("to_date", + [string_data_type, string_data_type, int32_data_type], + date64_data_type) + assert_equal(Gandiva::ResultNullableType::INTERNAL, + to_date.result_nullable_type) + end + end + + sub_test_case("need_context") do + def test_need + assert do + not @not.need_context + end + end + + def test_not_need + upper = lookup("upper", + [string_data_type], + string_data_type) + assert do + upper.need_context + end + end + end + + sub_test_case("need_function_holder") do + def test_need + like = lookup("like", + [string_data_type, string_data_type], + boolean_data_type) + assert do + like.need_function_holder + end + end + + def test_not_need + assert do + not @not.need_function_holder + end + end + end + + sub_test_case("can_return_errors") do + def test_can + divide = lookup("divide", + [int8_data_type, int8_data_type], + int8_data_type) + assert do + divide.can_return_errors? + end + end + + def test_not_can + assert do + not @not.can_return_errors? + end + end + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-null-literal-node.rb b/src/arrow/c_glib/test/gandiva/test-null-literal-node.rb new file mode 100644 index 000000000..ae14f3c15 --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-null-literal-node.rb @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaNullLiteralNode < Test::Unit::TestCase + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + end + + def test_invalid_type + return_type = Arrow::NullDataType.new + message = + "[gandiva][null-literal-node][new] " + + "failed to create: <#{return_type}>" + assert_raise(Arrow::Error::Invalid.new(message)) do + Gandiva::NullLiteralNode.new(return_type) + end + end + + def test_return_type + return_type = Arrow::BooleanDataType.new + literal_node = Gandiva::NullLiteralNode.new(return_type) + assert_equal(return_type, literal_node.return_type) + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-projector.rb b/src/arrow/c_glib/test/gandiva/test-projector.rb new file mode 100644 index 000000000..308e1c3a5 --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-projector.rb @@ -0,0 +1,63 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaProjector < Test::Unit::TestCase + include Helper::Buildable + + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + + field1 = Arrow::Field.new("field1", Arrow::Int32DataType.new) + field2 = Arrow::Field.new("field2", Arrow::Int32DataType.new) + @schema = Arrow::Schema.new([field1, field2]) + @field_node1 = Gandiva::FieldNode.new(field1) + @field_node2 = Gandiva::FieldNode.new(field2) + add_function_node = + Gandiva::FunctionNode.new("add", + [@field_node1, @field_node2], + Arrow::Int32DataType.new) + subtract_function_node = + Gandiva::FunctionNode.new("subtract", + [@field_node1, @field_node2], + Arrow::Int32DataType.new) + add_result = Arrow::Field.new("add_result", Arrow::Int32DataType.new) + add_expression = Gandiva::Expression.new(add_function_node, add_result) + subtract_result = Arrow::Field.new("subtract_result", + Arrow::Int32DataType.new) + subtract_expression = Gandiva::Expression.new(subtract_function_node, + subtract_result) + @projector = Gandiva::Projector.new(@schema, + [add_expression, subtract_expression]) + + input_arrays = [ + build_int32_array([1, 2, 3, 4]), + build_int32_array([11, 13, 15, 17]), + ] + @record_batch = Arrow::RecordBatch.new(@schema, + input_arrays[0].length, + input_arrays) + end + + def test_evaluate + outputs = @projector.evaluate(@record_batch) + assert_equal([ + [12, 15, 18, 21], + [-10, -11, -12, -13], + ], + outputs.collect(&:values)) + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-selectable-projector.rb b/src/arrow/c_glib/test/gandiva/test-selectable-projector.rb new file mode 100644 index 000000000..47b0059a2 --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-selectable-projector.rb @@ -0,0 +1,74 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaSelectableProjector < Test::Unit::TestCase + include Helper::Buildable + + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + + field1 = Arrow::Field.new("field1", Arrow::Int32DataType.new) + field2 = Arrow::Field.new("field2", Arrow::Int32DataType.new) + @schema = Arrow::Schema.new([field1, field2]) + + input_arrays = [ + build_int32_array([1, 2, 3, 4]), + build_int32_array([11, 13, 15, 17]), + ] + @record_batch = Arrow::RecordBatch.new(@schema, + input_arrays[0].length, + input_arrays) + + @field_node1 = Gandiva::FieldNode.new(field1) + @field_node2 = Gandiva::FieldNode.new(field2) + add_function_node = + Gandiva::FunctionNode.new("add", + [@field_node1, @field_node2], + Arrow::Int32DataType.new) + subtract_function_node = + Gandiva::FunctionNode.new("subtract", + [@field_node1, @field_node2], + Arrow::Int32DataType.new) + add_result = Arrow::Field.new("add_result", Arrow::Int32DataType.new) + add_expression = Gandiva::Expression.new(add_function_node, add_result) + subtract_result = Arrow::Field.new("subtract_result", + Arrow::Int32DataType.new) + subtract_expression = Gandiva::Expression.new(subtract_function_node, + subtract_result) + @selection_vector = Gandiva::UInt16SelectionVector.new(@record_batch.n_rows) + @projector = + Gandiva::SelectableProjector.new(@schema, + [add_expression, subtract_expression], + @selection_vector.mode) + end + + def test_evaluate + two_node = Gandiva::Int32LiteralNode.new(2) + condition_node = Gandiva::FunctionNode.new("greater_than", + [@field_node1, two_node], + Arrow::BooleanDataType.new) + condition = Gandiva::Condition.new(condition_node) + filter = Gandiva::Filter.new(@schema, condition) + filter.evaluate(@record_batch, @selection_vector) + outputs = @projector.evaluate(@record_batch, @selection_vector) + assert_equal([ + [18, 21], + [-12, -13], + ], + outputs.collect(&:values)) + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-selection-vector.rb b/src/arrow/c_glib/test/gandiva/test-selection-vector.rb new file mode 100644 index 000000000..ca5042c28 --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-selection-vector.rb @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaSelectionVector < Test::Unit::TestCase + include Helper::Buildable + + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + end + + def test_uint16 + selection_vector = Gandiva::UInt16SelectionVector.new(10) + assert_equal(build_uint16_array([]), + selection_vector.to_array) + end + + def test_uint32 + selection_vector = Gandiva::UInt32SelectionVector.new(10) + assert_equal(build_uint32_array([]), + selection_vector.to_array) + end + + def test_uint64 + selection_vector = Gandiva::UInt64SelectionVector.new(10) + assert_equal(build_uint64_array([]), + selection_vector.to_array) + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-string-literal-node.rb b/src/arrow/c_glib/test/gandiva/test-string-literal-node.rb new file mode 100644 index 000000000..8a397ab4d --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-string-literal-node.rb @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaStringLiteralNode < Test::Unit::TestCase + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + @value = "Hello" + @node = Gandiva::StringLiteralNode.new(@value) + end + + def test_value + assert_equal(@value, @node.value) + end + + def test_return_type + assert_equal(Arrow::StringDataType.new, @node.return_type) + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-uint16-literal-node.rb b/src/arrow/c_glib/test/gandiva/test-uint16-literal-node.rb new file mode 100644 index 000000000..971da3888 --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-uint16-literal-node.rb @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaUInt16LiteralNode < Test::Unit::TestCase + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + @value = 2 ** 16 - 1 + @node = Gandiva::UInt16LiteralNode.new(@value) + end + + def test_value + assert_equal(@value, @node.value) + end + + def test_return_type + assert_equal(Arrow::UInt16DataType.new, @node.return_type) + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-uint32-literal-node.rb b/src/arrow/c_glib/test/gandiva/test-uint32-literal-node.rb new file mode 100644 index 000000000..8fcab7fef --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-uint32-literal-node.rb @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaUInt32LiteralNode < Test::Unit::TestCase + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + @value = 2 ** 32 - 1 + @node = Gandiva::UInt32LiteralNode.new(@value) + end + + def test_value + assert_equal(@value, @node.value) + end + + def test_return_type + assert_equal(Arrow::UInt32DataType.new, @node.return_type) + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-uint64-literal-node.rb b/src/arrow/c_glib/test/gandiva/test-uint64-literal-node.rb new file mode 100644 index 000000000..d5afddcd7 --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-uint64-literal-node.rb @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaUInt64LiteralNode < Test::Unit::TestCase + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + @value = 3 + @node = Gandiva::UInt64LiteralNode.new(@value) + end + + def test_value + assert_equal(@value, @node.value) + end + + def test_return_type + assert_equal(Arrow::UInt64DataType.new, @node.return_type) + end +end diff --git a/src/arrow/c_glib/test/gandiva/test-uint8-literal-node.rb b/src/arrow/c_glib/test/gandiva/test-uint8-literal-node.rb new file mode 100644 index 000000000..8ce91d599 --- /dev/null +++ b/src/arrow/c_glib/test/gandiva/test-uint8-literal-node.rb @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaUInt8LiteralNode < Test::Unit::TestCase + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + @value = 2 ** 8 - 1 + @node = Gandiva::UInt8LiteralNode.new(@value) + end + + def test_value + assert_equal(@value, @node.value) + end + + def test_return_type + assert_equal(Arrow::UInt8DataType.new, @node.return_type) + end +end diff --git a/src/arrow/c_glib/test/helper/buildable.rb b/src/arrow/c_glib/test/helper/buildable.rb new file mode 100644 index 000000000..3a1240cfa --- /dev/null +++ b/src/arrow/c_glib/test/helper/buildable.rb @@ -0,0 +1,263 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Helper + module Buildable + def build_schema(fields) + fields = fields.collect do |name, data_type| + Arrow::Field.new(name, data_type) + end + Arrow::Schema.new(fields) + end + + def build_null_array(values) + build_array(Arrow::NullArrayBuilder.new, values) + end + + def build_boolean_array(values) + build_array(Arrow::BooleanArrayBuilder.new, values) + end + + def build_int_array(values) + build_array(Arrow::IntArrayBuilder.new, values) + end + + def build_uint_array(values) + build_array(Arrow::UIntArrayBuilder.new, values) + end + + def build_int8_array(values) + build_array(Arrow::Int8ArrayBuilder.new, values) + end + + def build_uint8_array(values) + build_array(Arrow::UInt8ArrayBuilder.new, values) + end + + def build_int16_array(values) + build_array(Arrow::Int16ArrayBuilder.new, values) + end + + def build_uint16_array(values) + build_array(Arrow::UInt16ArrayBuilder.new, values) + end + + def build_int32_array(values) + build_array(Arrow::Int32ArrayBuilder.new, values) + end + + def build_uint32_array(values) + build_array(Arrow::UInt32ArrayBuilder.new, values) + end + + def build_int64_array(values) + build_array(Arrow::Int64ArrayBuilder.new, values) + end + + def build_uint64_array(values) + build_array(Arrow::UInt64ArrayBuilder.new, values) + end + + def build_float_array(values) + build_array(Arrow::FloatArrayBuilder.new, values) + end + + def build_double_array(values) + build_array(Arrow::DoubleArrayBuilder.new, values) + end + + def build_date32_array(values) + build_array(Arrow::Date32ArrayBuilder.new, values) + end + + def build_date64_array(values) + build_array(Arrow::Date64ArrayBuilder.new, values) + end + + def build_timestamp_array(unit, values) + data_type = Arrow::TimestampDataType.new(unit) + build_array(Arrow::TimestampArrayBuilder.new(data_type), + values) + end + + def build_time32_array(unit, values) + build_array(Arrow::Time32ArrayBuilder.new(Arrow::Time32DataType.new(unit)), + values) + end + + def build_time64_array(unit, values) + build_array(Arrow::Time64ArrayBuilder.new(Arrow::Time64DataType.new(unit)), + values) + end + + def build_binary_array(values) + build_array(Arrow::BinaryArrayBuilder.new, values) + end + + def build_large_binary_array(values) + build_array(Arrow::LargeBinaryArrayBuilder.new, values) + end + + def build_fixed_size_binary_array(data_type, values) + build_array(Arrow::FixedSizeBinaryArrayBuilder.new(data_type), + values) + end + + def build_string_array(values) + build_array(Arrow::StringArrayBuilder.new, values) + end + + def build_large_string_array(values) + build_array(Arrow::LargeStringArrayBuilder.new, values) + end + + def build_decimal128_array(value_data_type, values) + values = values.collect do |value| + case value + when String + Arrow::Decimal128.new(value) + else + value + end + end + build_array(Arrow::Decimal128ArrayBuilder.new(value_data_type), + values) + end + + def build_list_array(value_data_type, values_list, field_name: "value") + value_field = Arrow::Field.new(field_name, value_data_type) + data_type = Arrow::ListDataType.new(value_field) + builder = Arrow::ListArrayBuilder.new(data_type) + values_list.each do |values| + append_to_builder(builder, values) + end + builder.finish + end + + def build_large_list_array(value_data_type, values_list, field_name: "value") + value_field = Arrow::Field.new(field_name, value_data_type) + data_type = Arrow::LargeListDataType.new(value_field) + builder = Arrow::LargeListArrayBuilder.new(data_type) + values_list.each do |values| + append_to_builder(builder, values) + end + builder.finish + end + + def build_map_array(key_data_type, item_data_type, maps) + data_type = Arrow::MapDataType.new(key_data_type, item_data_type) + builder = Arrow::MapArrayBuilder.new(data_type) + maps.each do |map| + append_to_builder(builder, map) + end + builder.finish + end + + def build_struct_array(fields, structs) + data_type = Arrow::StructDataType.new(fields) + builder = Arrow::StructArrayBuilder.new(data_type) + structs.each do |struct| + append_to_builder(builder, struct) + end + builder.finish + end + + def append_to_builder(builder, value) + if value.nil? + builder.append_null + else + data_type = builder.value_data_type + case data_type + when Arrow::MapDataType + builder.append_value + key_builder = builder.key_builder + item_builder = builder.item_builder + value.each do |k, v| + append_to_builder(key_builder, k) + append_to_builder(item_builder, v) + end + when Arrow::ListDataType, Arrow::LargeListDataType + builder.append_value + value_builder = builder.value_builder + value.each do |v| + append_to_builder(value_builder, v) + end + when Arrow::StructDataType + builder.append_value + value.each do |name, v| + field_index = data_type.get_field_index(name) + field_builder = builder.get_field_builder(field_index) + append_to_builder(field_builder, v) + end + else + builder.append_value(value) + end + end + end + + def build_table(columns) + fields = [] + chunked_arrays = [] + columns.each do |name, data| + case data + when Arrow::Array + chunked_array = Arrow::ChunkedArray.new([data]) + when Array + chunked_array = Arrow::ChunkedArray.new(data) + else + chunked_array = data + end + fields << Arrow::Field.new(name, chunked_array.value_data_type) + chunked_arrays << chunked_array + end + schema = Arrow::Schema.new(fields) + Arrow::Table.new(schema, chunked_arrays) + end + + def build_record_batch(columns) + n_rows = columns.collect {|_, array| array.length}.min || 0 + fields = columns.collect do |name, array| + Arrow::Field.new(name, array.value_data_type) + end + schema = Arrow::Schema.new(fields) + Arrow::RecordBatch.new(schema, n_rows, columns.values) + end + + def build_file_uri(path) + absolute_path = File.expand_path(path) + if absolute_path.start_with?("/") + "file://#{absolute_path}" + else + "file:///#{absolute_path}" + end + end + + private + def build_array(builder, values) + values.each do |value| + if value.nil? + builder.append_null + elsif builder.respond_to?(:append_string) + builder.append_string(value) + else + builder.append_value(value) + end + end + builder.finish + end + end +end diff --git a/src/arrow/c_glib/test/helper/data-type.rb b/src/arrow/c_glib/test/helper/data-type.rb new file mode 100644 index 000000000..bbe6866f5 --- /dev/null +++ b/src/arrow/c_glib/test/helper/data-type.rb @@ -0,0 +1,79 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Helper + module DataType + def boolean_data_type + Arrow::BooleanDataType.new + end + + def int8_data_type + Arrow::Int8DataType.new + end + + def int16_data_type + Arrow::Int16DataType.new + end + + def int32_data_type + Arrow::Int32DataType.new + end + + def int64_data_type + Arrow::Int64DataType.new + end + + def uint8_data_type + Arrow::UInt8DataType.new + end + + def uint16_data_type + Arrow::UInt16DataType.new + end + + def uint32_data_type + Arrow::UInt32DataType.new + end + + def uint64_data_type + Arrow::UInt64DataType.new + end + + def string_data_type + Arrow::StringDataType.new + end + + def date64_data_type + Arrow::Date64DataType.new + end + end +end diff --git a/src/arrow/c_glib/test/helper/fixture.rb b/src/arrow/c_glib/test/helper/fixture.rb new file mode 100644 index 000000000..f07afd0e4 --- /dev/null +++ b/src/arrow/c_glib/test/helper/fixture.rb @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Helper + module Fixture + def fixture_path(*components) + File.join(__dir__, "..", "fixture", *components) + end + end +end diff --git a/src/arrow/c_glib/test/helper/flight-info-generator.rb b/src/arrow/c_glib/test/helper/flight-info-generator.rb new file mode 100644 index 000000000..c57530879 --- /dev/null +++ b/src/arrow/c_glib/test/helper/flight-info-generator.rb @@ -0,0 +1,64 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require_relative "buildable" +require_relative "data-type" +require_relative "writable" + +module Helper + class FlightInfoGenerator + include Buildable + include DataType + include Writable + + def page_view_table + build_table("count" => build_uint64_array([1, 2, 3]), + "private" => build_boolean_array([true, false, true])) + end + + def page_view_descriptor + ArrowFlight::PathDescriptor.new(["page-view"]) + end + + def page_view_ticket + ArrowFlight::Ticket.new("page-view") + end + + def page_view_endpoints + locations = [ + ArrowFlight::Location.new("grpc+tcp://127.0.0.1:10000"), + ArrowFlight::Location.new("grpc+tcp://127.0.0.1:10001"), + ] + [ + ArrowFlight::Endpoint.new(page_view_ticket, locations), + ] + end + + def page_view + table = page_view_table + descriptor = page_view_descriptor + endpoints = page_view_endpoints + output = Arrow::ResizableBuffer.new(0) + write_table(table, output, type: :stream) + ArrowFlight::Info.new(table.schema, + descriptor, + endpoints, + table.n_rows, + output.size) + end + end +end diff --git a/src/arrow/c_glib/test/helper/flight-server.rb b/src/arrow/c_glib/test/helper/flight-server.rb new file mode 100644 index 000000000..89fd13b42 --- /dev/null +++ b/src/arrow/c_glib/test/helper/flight-server.rb @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require_relative "flight-info-generator" + +module Helper + class FlightServer < ArrowFlight::Server + type_register + + private + def virtual_do_list_flights(context, criteria) + generator = FlightInfoGenerator.new + [generator.page_view] + end + + def virtual_do_do_get(context, ticket) + generator = FlightInfoGenerator.new + unless ticket == generator.page_view_ticket + raise Arrow::Error::Invalid.new("invalid ticket") + end + table = generator.page_view_table + reader = Arrow::TableBatchReader.new(table) + ArrowFlight::RecordBatchStream.new(reader) + end + end +end diff --git a/src/arrow/c_glib/test/helper/omittable.rb b/src/arrow/c_glib/test/helper/omittable.rb new file mode 100644 index 000000000..a1c0334b6 --- /dev/null +++ b/src/arrow/c_glib/test/helper/omittable.rb @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Helper + module Omittable + def require_gi_bindings(major, minor, micro) + return if GLib.check_binding_version?(major, minor, micro) + message = + "Require gobject-introspection #{major}.#{minor}.#{micro} or later: " + + GLib::BINDING_VERSION.join(".") + omit(message) + end + + def require_gi(major, minor, micro) + return if GObjectIntrospection::Version.or_later?(major, minor, micro) + message = + "Require GObject Introspection #{major}.#{minor}.#{micro} or later: " + + GObjectIntrospection::Version::STRING + omit(message) + end + end +end diff --git a/src/arrow/c_glib/test/helper/plasma-store.rb b/src/arrow/c_glib/test/helper/plasma-store.rb new file mode 100644 index 000000000..dcf1f47ae --- /dev/null +++ b/src/arrow/c_glib/test/helper/plasma-store.rb @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Helper + class PlasmaStore + def initialize(options={}) + @path = `pkg-config --variable=plasma_store_server plasma`.chomp + @memory_size = options[:memory_size] || 1024 * 1024 + @socket_file = Tempfile.new(["plasma-store", ".sock"]) + @socket_file.close + @pid = nil + FileUtils.rm_f(socket_path) + end + + def socket_path + @socket_file.path + end + + def start + @pid = spawn(@path, + "-m", @memory_size.to_s, + "-s", socket_path) + until File.exist?(socket_path) + if Process.waitpid(@pid, Process::WNOHANG) + raise "Failed to run plasma-store-server: #{@path}" + end + end + end + + def stop + return if @pid.nil? + Process.kill(:TERM, @pid) + timeout = 1 + limit = Time.now + timeout + while Time.now < limit + return if Process.waitpid(@pid, Process::WNOHANG) + sleep(0.1) + end + Process.kill(:KILL, @pid) + Process.waitpid(@pid) + end + end +end diff --git a/src/arrow/c_glib/test/helper/readable.rb b/src/arrow/c_glib/test/helper/readable.rb new file mode 100644 index 000000000..81bf0795c --- /dev/null +++ b/src/arrow/c_glib/test/helper/readable.rb @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Helper + module Readable + def read_table(input, type: :file) + if input.is_a?(Arrow::Buffer) + input_stream = Arrow::BufferIntputStream.new(input) + else + input_stream = Arrow::FileInputStream.new(input) + end + begin + if type == :file + reader = Arrow::RecordBatchFileReader.new(input_stream) + record_batches = [] + reader.n_record_batches.times do |i| + record_batches << reader.read_record_batch(i) + end + yield(Arrow::Table.new(record_batches[0].schema, record_batches)) + else + reader = Arrow::RecordBatchStreamReader.new(input_stream) + begin + yield(reader.read_all) + ensure + reader.close + end + end + ensure + input_stream.close + end + end + end +end diff --git a/src/arrow/c_glib/test/helper/writable.rb b/src/arrow/c_glib/test/helper/writable.rb new file mode 100644 index 000000000..1c8db756c --- /dev/null +++ b/src/arrow/c_glib/test/helper/writable.rb @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Helper + module Writable + def write_table(table, output, type: :file) + if output.is_a?(Arrow::Buffer) + output_stream = Arrow::BufferOutputStream.new(output) + else + output_stream = Arrow::FileOutputStream.new(output, false) + end + begin + if type == :file + writer_class = Arrow::RecordBatchFileWriter + else + writer_class = Arrow::RecordBatchStreamWriter + end + writer = writer_class.new(output_stream, table.schema) + begin + writer.write_table(table) + ensure + writer.close + end + ensure + output_stream.close + end + end + end +end diff --git a/src/arrow/c_glib/test/parquet/test-arrow-file-reader.rb b/src/arrow/c_glib/test/parquet/test-arrow-file-reader.rb new file mode 100644 index 000000000..45eb33596 --- /dev/null +++ b/src/arrow/c_glib/test/parquet/test-arrow-file-reader.rb @@ -0,0 +1,69 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestParquetArrowFileReader < Test::Unit::TestCase + include Helper::Buildable + + def setup + omit("Parquet is required") unless defined?(::Parquet) + @file = Tempfile.open(["data", ".parquet"]) + @a_array = build_string_array(["foo", "bar"]) + @b_array = build_int32_array([123, 456]) + @table = build_table("a" => @a_array, + "b" => @b_array) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1 + writer.write_table(@table, chunk_size) + writer.close + @reader = Parquet::ArrowFileReader.new(@file.path) + end + + def test_schema + assert_equal(<<-SCHEMA.chomp, @reader.schema.to_s) +a: string +b: int32 + SCHEMA + end + + sub_test_case("#read_row_group") do + test("with column indices") do + assert_equal(build_table("b" => @b_array.slice(0, 1)), + @reader.read_row_group(0, [-1])) + end + + test("without column indices") do + assert_equal(build_table("a" => @a_array.slice(1, 1), + "b" => @b_array.slice(1, 1)), + @reader.read_row_group(1)) + end + end + + def test_read_column + assert_equal([ + Arrow::ChunkedArray.new([@a_array]), + Arrow::ChunkedArray.new([@b_array]), + ], + [ + @reader.read_column_data(0), + @reader.read_column_data(-1), + ]) + end + + def test_n_rows + assert_equal(2, @reader.n_rows) + end +end diff --git a/src/arrow/c_glib/test/parquet/test-arrow-file-writer.rb b/src/arrow/c_glib/test/parquet/test-arrow-file-writer.rb new file mode 100644 index 000000000..855527444 --- /dev/null +++ b/src/arrow/c_glib/test/parquet/test-arrow-file-writer.rb @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestParquetArrowFileWriter < Test::Unit::TestCase + include Helper::Buildable + + def setup + omit("Parquet is required") unless defined?(::Parquet) + @file = Tempfile.open(["data", ".parquet"]) + end + + def test_write + enabled_values = [true, nil, false, true] + table = build_table("enabled" => build_boolean_array(enabled_values)) + chunk_size = 2 + + writer = Parquet::ArrowFileWriter.new(table.schema, @file.path) + writer.write_table(table, chunk_size) + writer.close + + reader = Parquet::ArrowFileReader.new(@file.path) + reader.use_threads = true + assert_equal([ + enabled_values.length / chunk_size, + true, + ], + [ + reader.n_row_groups, + table.equal_metadata(reader.read_table, false), + ]) + end +end diff --git a/src/arrow/c_glib/test/parquet/test-writer-properties.rb b/src/arrow/c_glib/test/parquet/test-writer-properties.rb new file mode 100644 index 000000000..1203a220b --- /dev/null +++ b/src/arrow/c_glib/test/parquet/test-writer-properties.rb @@ -0,0 +1,103 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestParquetWriterProperties < Test::Unit::TestCase + def setup + omit("Parquet is required") unless defined?(::Parquet) + @properties = Parquet::WriterProperties.new + end + + def test_compression + @properties.set_compression(:gzip) + assert_equal(Arrow::CompressionType::GZIP, + @properties.get_compression_path("not-specified")) + end + + def test_compression_with_path + @properties.set_compression(:gzip, "column") + assert_equal([ + Arrow::CompressionType::GZIP, + Arrow::CompressionType::UNCOMPRESSED, + ], + [ + @properties.get_compression_path("column"), + @properties.get_compression_path("not-specified"), + ]) + end + + def test_enable_dictionary + @properties.enable_dictionary + assert_equal(true, + @properties.dictionary_enabled?("not-specified")) + end + + def test_enable_dictionary_with_path + @properties.disable_dictionary + @properties.enable_dictionary("column") + assert_equal([ + true, + false, + ], + [ + @properties.dictionary_enabled?("column"), + @properties.dictionary_enabled?("not-specified"), + ]) + end + + def test_disable_dictionary + @properties.disable_dictionary + assert_equal(false, + @properties.dictionary_enabled?("not-specified")) + end + + def test_disable_dictionary_with_path + @properties.enable_dictionary + @properties.disable_dictionary("column") + assert_equal([ + false, + true, + ], + [ + @properties.dictionary_enabled?("column"), + @properties.dictionary_enabled?("not-specified"), + ]) + end + + def test_dictionary_page_size_limit + @properties.dictionary_page_size_limit = 4096 + assert_equal(4096, + @properties.dictionary_page_size_limit) + end + + def test_batch_size + @properties.batch_size = 100 + assert_equal(100, + @properties.batch_size) + end + + def test_data_page_size + @properties.data_page_size = 128 + assert_equal(128, + @properties.data_page_size) + end + + def test_max_row_group_length + @properties.max_row_group_length = 1024 + assert_equal(1024, + @properties.max_row_group_length) + end +end diff --git a/src/arrow/c_glib/test/plasma/test-plasma-client-options.rb b/src/arrow/c_glib/test/plasma/test-plasma-client-options.rb new file mode 100644 index 000000000..abe6fd3ce --- /dev/null +++ b/src/arrow/c_glib/test/plasma/test-plasma-client-options.rb @@ -0,0 +1,31 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestPlasmaClientOptions < Test::Unit::TestCase + include Helper::Omittable + + def setup + omit("Plasma is required") unless defined?(::Plasma) + @options = Plasma::ClientOptions.new + end + + test("n_retries") do + assert_equal(-1, @options.n_retries) + @options.n_retries = 10 + assert_equal(10, @options.n_retries) + end +end diff --git a/src/arrow/c_glib/test/plasma/test-plasma-client.rb b/src/arrow/c_glib/test/plasma/test-plasma-client.rb new file mode 100644 index 000000000..4ff39372b --- /dev/null +++ b/src/arrow/c_glib/test/plasma/test-plasma-client.rb @@ -0,0 +1,94 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestPlasmaClient < Test::Unit::TestCase + include Helper::Omittable + + def setup + @store = nil + omit("Plasma is required") unless defined?(::Plasma) + require_gi_bindings(3, 3, 9) + @store = Helper::PlasmaStore.new + @store.start + @options = Plasma::ClientOptions.new + @client = Plasma::Client.new(@store.socket_path, @options) + @id = Plasma::ObjectID.new("Hello") + @data = "World" + @options = Plasma::ClientCreateOptions.new + end + + def teardown + @store.stop if @store + end + + sub_test_case("#create") do + def setup + super + + @metadata = "Metadata" + end + + test("no options") do + object = @client.create(@id, @data.bytesize) + object.data.set_data(0, @data) + object.seal + + object = @client.refer_object(@id, -1) + assert_equal(@data, object.data.data.to_s) + end + + test("options: metadata") do + @options.set_metadata(@metadata) + object = @client.create(@id, 1, @options) + object.seal + + object = @client.refer_object(@id, -1) + assert_equal(@metadata, object.metadata.data.to_s) + end + + test("options: GPU device") do + omit("Arrow CUDA is required") unless defined?(::ArrowCUDA) + + gpu_device = 0 + + @options.gpu_device = gpu_device + @options.metadata = @metadata + object = @client.create(@id, @data.bytesize, @options) + object.data.copy_from_host(@data) + object.seal + + object = @client.refer_object(@id, -1) + assert_equal([ + gpu_device, + @data, + @metadata, + ], + [ + object.gpu_device, + object.data.copy_to_host(0, @data.bytesize).to_s, + object.metadata.copy_to_host(0, @metadata.bytesize).to_s, + ]) + end + end + + test("#disconnect") do + @client.disconnect + assert_raise(Arrow::Error::Io) do + @client.create(@id, @data.bytesize, @options) + end + end +end diff --git a/src/arrow/c_glib/test/plasma/test-plasma-created-object.rb b/src/arrow/c_glib/test/plasma/test-plasma-created-object.rb new file mode 100644 index 000000000..8d036cda8 --- /dev/null +++ b/src/arrow/c_glib/test/plasma/test-plasma-created-object.rb @@ -0,0 +1,59 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestPlasmaCreatedObject < Test::Unit::TestCase + include Helper::Omittable + + def setup + @store = nil + omit("Plasma is required") unless defined?(::Plasma) + require_gi_bindings(3, 3, 9) + @store = Helper::PlasmaStore.new + @store.start + @client = Plasma::Client.new(@store.socket_path, nil) + + @id = Plasma::ObjectID.new("Hello") + @data = "World" + @metadata = "Metadata" + @options = Plasma::ClientCreateOptions.new + @options.metadata = @metadata + @object = @client.create(@id, @data.bytesize, @options) + end + + def teardown + @store.stop if @store + end + + test("#seal") do + @object.data.set_data(0, @data) + @object.seal + + object = @client.refer_object(@id, -1) + assert_equal(@data, object.data.data.to_s) + end + + test("#abort") do + @object.data.set_data(0, @data) + assert_raise(Arrow::Error::AlreadyExists) do + @client.create(@id, @data.bytesize, @options) + end + @object.abort + + object = @client.create(@id, @data.bytesize, @options) + object.abort + end +end diff --git a/src/arrow/c_glib/test/plasma/test-plasma-referred-object.rb b/src/arrow/c_glib/test/plasma/test-plasma-referred-object.rb new file mode 100644 index 000000000..ef4c72aee --- /dev/null +++ b/src/arrow/c_glib/test/plasma/test-plasma-referred-object.rb @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestPlasmaReferredObject < Test::Unit::TestCase + include Helper::Omittable + + def setup + @store = nil + omit("Plasma is required") unless defined?(::Plasma) + require_gi_bindings(3, 3, 9) + @store = Helper::PlasmaStore.new + @store.start + @client = Plasma::Client.new(@store.socket_path, nil) + + @id = Plasma::ObjectID.new("Hello") + @data = "World" + @metadata = "Metadata" + @options = Plasma::ClientCreateOptions.new + @options.metadata = @metadata + object = @client.create(@id, @data.bytesize, @options) + object.data.set_data(0, @data) + object.seal + @object = @client.refer_object(@id, -1) + end + + def teardown + @store.stop if @store + end + + test("#release") do + @object.release + + message = "[plasma][referred-object][release]: " + message << "Can't process released object: <#{@id.to_hex}>" + error = Arrow::Error::Invalid.new(message) + assert_raise(error) do + @object.release + end + end +end diff --git a/src/arrow/c_glib/test/run-test.rb b/src/arrow/c_glib/test/run-test.rb new file mode 100755 index 000000000..621c78c39 --- /dev/null +++ b/src/arrow/c_glib/test/run-test.rb @@ -0,0 +1,104 @@ +#!/usr/bin/env ruby +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "pathname" +require "test-unit" + +(ENV["ARROW_DLL_PATH"] || "").split(File::PATH_SEPARATOR).each do |path| + RubyInstaller::Runtime.add_dll_directory(path) +end + +base_dir = Pathname(__dir__).parent +test_dir = base_dir + "test" + +require "gi" + +Gio = GI.load("Gio") +Arrow = GI.load("Arrow") +module Arrow + class Buffer + alias_method :initialize_raw, :initialize + def initialize(data) + initialize_raw(data) + @data = data + end + end + + class BooleanScalar + alias_method :value, :value? + end +end + +begin + ArrowCUDA = GI.load("ArrowCUDA") +rescue GObjectIntrospection::RepositoryError::TypelibNotFound +end + +begin + ArrowDataset = GI.load("ArrowDataset") +rescue GObjectIntrospection::RepositoryError::TypelibNotFound +end + +begin + class ArrowFlightLoader < GI::Loader + def should_unlock_gvl?(info, klass) + true + end + end + flight_module = Module.new + ArrowFlightLoader.load("ArrowFlight", flight_module) + ArrowFlight = flight_module + GObjectIntrospection::Loader.start_callback_dispatch_thread +rescue GObjectIntrospection::RepositoryError::TypelibNotFound +end + +begin + Gandiva = GI.load("Gandiva") +rescue GObjectIntrospection::RepositoryError::TypelibNotFound +end + +begin + Parquet = GI.load("Parquet") +rescue GObjectIntrospection::RepositoryError::TypelibNotFound +end + +begin + Plasma = GI.load("Plasma") +rescue GObjectIntrospection::RepositoryError::TypelibNotFound +end + +require "fileutils" +require "find" +require "rbconfig" +require "stringio" +require "tempfile" +require "zlib" +require_relative "helper/buildable" +require_relative "helper/data-type" +require_relative "helper/fixture" +if defined?(ArrowFlight) + require_relative "helper/flight-info-generator" + require_relative "helper/flight-server" +end +require_relative "helper/omittable" +require_relative "helper/plasma-store" +require_relative "helper/readable" +require_relative "helper/writable" + +exit(Test::Unit::AutoRunner.run(true, test_dir.to_s)) diff --git a/src/arrow/c_glib/test/run-test.sh b/src/arrow/c_glib/test/run-test.sh new file mode 100755 index 000000000..c3565d272 --- /dev/null +++ b/src/arrow/c_glib/test/run-test.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env sh +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +test_dir="$(cd $(dirname $0); pwd)" +build_dir="$(cd .; pwd)" + +modules="arrow-glib arrow-cuda-glib arrow-dataset-glib arrow-flight-glib gandiva-glib parquet-glib plasma-glib" + +for module in ${modules}; do + module_build_dir="${build_dir}/${module}" + libtool_dir="${module_build_dir}/.libs" + if [ -d "${libtool_dir}" ]; then + LD_LIBRARY_PATH="${libtool_dir}:${LD_LIBRARY_PATH}" + else + if [ -d "${module_build_dir}" ]; then + LD_LIBRARY_PATH="${module_build_dir}:${LD_LIBRARY_PATH}" + fi + fi +done +export LD_LIBRARY_PATH + +if [ "${BUILD}" != "no" ]; then + if [ -f "Makefile" ]; then + make -j8 > /dev/null || exit $? + elif [ -f "build.ninja" ]; then + ninja || exit $? + fi +fi + +for module in ${modules}; do + MODULE_TYPELIB_DIR_VAR_NAME="$(echo ${module} | tr a-z- A-Z_)_TYPELIB_DIR" + module_typelib_dir=$(eval "echo \${${MODULE_TYPELIB_DIR_VAR_NAME}}") + if [ -z "${module_typelib_dir}" ]; then + module_typelib_dir="${build_dir}/${module}" + fi + + if [ -d "${module_typelib_dir}" ]; then + GI_TYPELIB_PATH="${module_typelib_dir}:${GI_TYPELIB_PATH}" + fi +done +export GI_TYPELIB_PATH + +${GDB} ruby ${test_dir}/run-test.rb "$@" diff --git a/src/arrow/c_glib/test/test-array-builder.rb b/src/arrow/c_glib/test/test-array-builder.rb new file mode 100644 index 000000000..6bece6367 --- /dev/null +++ b/src/arrow/c_glib/test/test-array-builder.rb @@ -0,0 +1,1944 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module ArrayBuilderAppendValueBytesTests + def test_append + builder = create_builder + value = "\x00\xff" + builder.append_value_bytes(GLib::Bytes.new(value)) + assert_equal(build_array([value]), + builder.finish) + end +end + +module ArrayBuilderAppendValuesTests + def test_empty + builder = create_builder + builder.append_values([]) + assert_equal(build_array([]), + builder.finish) + end + + def test_values_only + builder = create_builder + builder.append_values(sample_values) + assert_equal(build_array(sample_values), + builder.finish) + end + + def test_with_is_valids + builder = create_builder + builder.append_values(sample_values, [true, true, false]) + actual_sample_values = sample_values.dup + actual_sample_values[2] = nil + assert_equal(build_array(actual_sample_values), + builder.finish) + end + + def test_with_large_is_valids + builder = create_builder + n = 10000 + large_sample_values = sample_values * n + large_is_valids = [true, true, false] * n + builder.append_values(large_sample_values, large_is_valids) + actual_sample_values = sample_values.dup + actual_sample_values[2] = nil + actual_large_sample_values = actual_sample_values * n + assert_equal(build_array(actual_large_sample_values), + builder.finish) + end + + def test_mismatch_length + builder = create_builder + message = "[#{builder_class_name}][append-values]: " + + "values length and is_valids length must be equal: <3> != <2>" + assert_raise(Arrow::Error::Invalid.new(message)) do + builder.append_values(sample_values, [true, true]) + end + end +end + +module ArrayBuilderAppendValuesWithNullTests + def test_values_only + builder = create_builder + builder.append_values(sample_values_with_null) + assert_equal(build_array(sample_values_with_null), + builder.finish) + end + + def test_large_values_only + builder = create_builder + n = 10000 + large_sample_values_with_null = sample_values_with_null * n + builder.append_values(large_sample_values_with_null) + assert_equal(build_array(large_sample_values_with_null), + builder.finish) + end + + def test_with_is_valids + builder = create_builder + builder.append_values(sample_values_with_null, [true, true, false]) + actual_sample_values = sample_values_with_null.dup + actual_sample_values[2] = nil + assert_equal(build_array(actual_sample_values), + builder.finish) + end + + def test_with_large_is_valids + builder = create_builder + n = 10000 + large_sample_values = sample_values_with_null * n + large_is_valids = [true, true, false] * n + builder.append_values(large_sample_values, large_is_valids) + actual_sample_values = sample_values_with_null.dup + actual_sample_values[2] = nil + actual_large_sample_values = actual_sample_values * n + assert_equal(build_array(actual_large_sample_values), + builder.finish) + end +end + +module ArrayBuilderAppendValuesPackedTests + def test_empty + builder = create_builder + builder.append_values_packed("") + assert_equal(build_array([]), + builder.finish) + end + + def test_values_only + builder = create_builder + builder.append_values_packed(pack_values(sample_values)) + assert_equal(build_array(sample_values), + builder.finish) + end + + def test_with_is_valids + builder = create_builder + builder.append_values_packed(pack_values(sample_values), + [true, true, false]) + sample_values_with_null = sample_values + sample_values_with_null[2] = nil + assert_equal(build_array(sample_values_with_null), + builder.finish) + end + + def test_with_large_is_valids + builder = create_builder + n = 10000 + large_sample_values = sample_values * n + large_is_valids = [true, true, false] * n + builder.append_values_packed(pack_values(large_sample_values), + large_is_valids) + sample_values_with_null = sample_values + sample_values_with_null[2] = nil + large_sample_values_with_null = sample_values_with_null * n + assert_equal(build_array(large_sample_values_with_null), + builder.finish) + end + + def test_mismatch_length + builder = create_builder + message = "[fixed-size-binary-array-builder][append-values-packed]: " + + "the number of values and is_valids length must be equal: <3> != <2>" + assert_raise(Arrow::Error::Invalid.new(message)) do + builder.append_values_packed(pack_values(sample_values), + [true, true]) + end + end +end + +module ArrayBuilderAppendStringsTests + def test_empty + builder = create_builder + builder.append_strings([]) + assert_equal(build_array([]), + builder.finish) + end + + def test_strings_only + builder = create_builder + builder.append_strings(sample_values) + assert_equal(build_array(sample_values), + builder.finish) + end + + def test_with_is_valids + builder = create_builder + builder.append_strings(sample_values, [true, true, false]) + sample_values_with_null = sample_values + sample_values_with_null[2] = nil + assert_equal(build_array(sample_values_with_null), + builder.finish) + end + + def test_with_large_is_valids + builder = create_builder + n = 10000 + large_sample_values = sample_values * n + large_is_valids = [true, true, false] * n + builder.append_strings(large_sample_values, large_is_valids) + sample_values_with_null = sample_values + sample_values_with_null[2] = nil + large_sample_values_with_null = sample_values_with_null * n + assert_equal(build_array(large_sample_values_with_null), + builder.finish) + end + + def test_mismatch_length + builder = create_builder + message = "[#{builder_class_name}][append-strings]: " + + "values length and is_valids length must be equal: <3> != <2>" + assert_raise(Arrow::Error::Invalid.new(message)) do + builder.append_strings(sample_values, [true, true]) + end + end +end + +module ArrayBuilderAppendNullsTests + def test_zero + builder = create_builder + builder.append_nulls(0) + assert_equal(build_array([]), + builder.finish) + end + + def test_positive + builder = create_builder + builder.append_nulls(3) + assert_equal(build_array([nil, nil, nil]), + builder.finish) + end + + def test_negative + builder = create_builder + message = "[array-builder][append-nulls]: " + + "the number of nulls must be 0 or larger: <-1>" + assert_raise(Arrow::Error::Invalid.new(message)) do + builder.append_nulls(-1) + end + end +end + +module ArrayBuilderAppendEmptyValueTests + def test_append + builder = create_builder + builder.append_empty_value + assert_equal(build_array([empty_value]), + builder.finish) + end +end + +module ArrayBuilderAppendEmptyValuesTests + def test_zero + builder = create_builder + builder.append_empty_values(0) + assert_equal(build_array([]), + builder.finish) + end + + def test_positive + builder = create_builder + builder.append_empty_values(3) + assert_equal(build_array([empty_value] * 3), + builder.finish) + end + + def test_negative + builder = create_builder + message = "[array-builder][append-empty-values]: " + + "the number of empty values must be 0 or larger: <-1>" + assert_raise(Arrow::Error::Invalid.new(message)) do + builder.append_empty_values(-1) + end + end +end + +module ArrayBuilderValueTypeTests + def test_value_data_type + assert_equal(value_data_type, + build_array(sample_values).value_data_type) + end + + def test_value_type + assert_equal(value_data_type.id, + build_array(sample_values).value_type) + end +end + +module ArrayBuilderCapacityControlTests + def test_resize + builder = create_builder + before_capacity = builder.capacity + builder.resize(before_capacity + 100) + after_capacity = builder.capacity + + assert do + after_capacity >= before_capacity + 100 + end + end + + def test_reserve + builder = create_builder + before_capacity = builder.capacity + builder.reserve(100) + after_capacity = builder.capacity + + assert do + after_capacity >= before_capacity + 100 + end + end +end + +module ArrayBuilderLengthTests + def test_length + builder = create_builder + sample_values_with_null = sample_values + sample_values_with_null[2, 0] = nil + lengths = [builder.length] + sample_values_with_null.each do |value| + if value.nil? + builder.append_null + else + builder.append_value(value) + end + lengths << builder.length + end + expected_lengths = [*0 ... (sample_values_with_null.length+1)] + assert_equal(expected_lengths, + lengths) + end +end + +module ArrayBuilderNNullsTests + def test_n_nulls + builder = create_builder + sample_values_with_null = sample_values + sample_values_with_null[2, 0] = nil + null_counts = [builder.n_nulls] + sample_values_with_null.each do |value| + if value.nil? + builder.append_null + else + builder.append_value(value) + end + null_counts << builder.n_nulls + end + expected_null_counts = [0, 0, 0] + [1] * (sample_values_with_null.length - 2) + assert_equal(expected_null_counts, + null_counts) + end +end + +class TestArrayBuilder < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + def setup + require_gi_bindings(3, 1, 9) + end + + def build_array(values) + super(create_builder, values) + end + + sub_test_case("NullArrayBuilder") do + def create_builder + Arrow::NullArrayBuilder.new + end + + def value_data_type + Arrow::NullDataType.new + end + + def builder_class_name + "null-array-builder" + end + + def sample_values + [nil, nil, nil] + end + + def empty_value + nil + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + test("#append_null") do + builder = create_builder + builder.append_null + assert_equal(build_array([nil]), + builder.finish) + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + test("#length") do + builder = create_builder + before = builder.length + builder.append_null + after = builder.length + assert_equal(1, + after - before) + end + + test("#n_nulls") do + builder = create_builder + before = builder.length + builder.append_null + after = builder.length + assert_equal(1, + after - before) + end + end + + sub_test_case("BooleanArrayBuilder") do + def create_builder + Arrow::BooleanArrayBuilder.new + end + + def value_data_type + Arrow::BooleanDataType.new + end + + def builder_class_name + "boolean-array-builder" + end + + def sample_values + [true, false, true] + end + + def empty_value + false + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end + + sub_test_case("IntArrayBuilder") do + def create_builder + Arrow::IntArrayBuilder.new + end + + def value_data_type + Arrow::Int8DataType.new + end + + def builder_class_name + "int-array-builder" + end + + def sample_values + [1, -2, 3] + end + + def empty_value + 0 + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end + + sub_test_case("UIntArrayBuilder") do + def create_builder + Arrow::UIntArrayBuilder.new + end + + def value_data_type + Arrow::UInt8DataType.new + end + + def builder_class_name + "uint-array-builder" + end + + def sample_values + [1, 2, 3] + end + + def empty_value + 0 + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end + + sub_test_case("Int8ArrayBuilder") do + def create_builder + Arrow::Int8ArrayBuilder.new + end + + def value_data_type + Arrow::Int8DataType.new + end + + def builder_class_name + "int8-array-builder" + end + + def sample_values + [1, -2, 3] + end + + def empty_value + 0 + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end + + sub_test_case("UInt8ArrayBuilder") do + def create_builder + Arrow::UInt8ArrayBuilder.new + end + + def value_data_type + Arrow::UInt8DataType.new + end + + def builder_class_name + "uint8-array-builder" + end + + def sample_values + [1, 2, 3] + end + + def empty_value + 0 + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end + + sub_test_case("Int16ArrayBuilder") do + def create_builder + Arrow::Int16ArrayBuilder.new + end + + def value_data_type + Arrow::Int16DataType.new + end + + def builder_class_name + "int16-array-builder" + end + + def sample_values + [1, -2, 3] + end + + def empty_value + 0 + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end + + sub_test_case("UInt16ArrayBuilder") do + def create_builder + Arrow::UInt16ArrayBuilder.new + end + + def value_data_type + Arrow::UInt16DataType.new + end + + def builder_class_name + "uint16-array-builder" + end + + def sample_values + [1, 2, 3] + end + + def empty_value + 0 + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end + + sub_test_case("Int32ArrayBuilder") do + def create_builder + Arrow::Int32ArrayBuilder.new + end + + def value_data_type + Arrow::Int32DataType.new + end + + def builder_class_name + "int32-array-builder" + end + + def sample_values + [1, -2, 3] + end + + def empty_value + 0 + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end + + sub_test_case("UInt32ArrayBuilder") do + def create_builder + Arrow::UInt32ArrayBuilder.new + end + + def value_data_type + Arrow::UInt32DataType.new + end + + def builder_class_name + "uint32-array-builder" + end + + def sample_values + [1, 2, 3] + end + + def empty_value + 0 + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end + + sub_test_case("Int64ArrayBuilder") do + def create_builder + Arrow::Int64ArrayBuilder.new + end + + def value_data_type + Arrow::Int64DataType.new + end + + def builder_class_name + "int64-array-builder" + end + + def sample_values + [1, -2, 3] + end + + def empty_value + 0 + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end + + sub_test_case("UInt64ArrayBuilder") do + def create_builder + Arrow::UInt64ArrayBuilder.new + end + + def value_data_type + Arrow::UInt64DataType.new + end + + def builder_class_name + "uint64-array-builder" + end + + def sample_values + [1, 2, 3] + end + + def empty_value + 0 + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end + + sub_test_case("FloatArrayBuilder") do + def create_builder + Arrow::FloatArrayBuilder.new + end + + def value_data_type + Arrow::FloatDataType.new + end + + def builder_class_name + "float-array-builder" + end + + def sample_values + [1.1, -2.2, 3.3] + end + + def empty_value + 0.0 + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end + + sub_test_case("DoubleArrayBuilder") do + def create_builder + Arrow::DoubleArrayBuilder.new + end + + def value_data_type + Arrow::DoubleDataType.new + end + + def builder_class_name + "double-array-builder" + end + + def sample_values + [1.1, -2.2, 3.3] + end + + def empty_value + 0.0 + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end + + sub_test_case("Date32ArrayBuilder") do + def create_builder + Arrow::Date32ArrayBuilder.new + end + + def value_data_type + Arrow::Date32DataType.new + end + + def builder_class_name + "date32-array-builder" + end + + def sample_values + [ + 0, # epoch + 17406, # 2017-08-28 + 17427, # 2017-09-18 + ] + end + + def empty_value + 0 + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end + + sub_test_case("Date64ArrayBuilder") do + def create_builder + Arrow::Date64ArrayBuilder.new + end + + def value_data_type + Arrow::Date64DataType.new + end + + def builder_class_name + "date64-array-builder" + end + + def sample_values + [ + -315619200, # 1960-01-01T00:00:00Z + 0, # epoch + 1503878400000, # 2017-08-28T00:00:00Z + ] + end + + def empty_value + 0 + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end + + sub_test_case("TimestampArrayBuilder") do + def create_builder + data_type = Arrow::TimestampDataType.new(:milli) + Arrow::TimestampArrayBuilder.new(data_type) + end + + def value_data_type + Arrow::TimestampDataType.new(:milli) + end + + def builder_class_name + "timestamp-array-builder" + end + + def sample_values + [ + 0, # epoch + 1504953190854, # 2017-09-09T10:33:10.854Z + 1505660812942, # 2017-09-17T15:06:52.942Z + ] + end + + def empty_value + 0 + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end + + sub_test_case("Time32ArrayBuilder") do + def create_builder + data_type = Arrow::Time32DataType.new(:second) + Arrow::Time32ArrayBuilder.new(data_type) + end + + def value_data_type + Arrow::Time32DataType.new(:second) + end + + def builder_class_name + "time32-array-builder" + end + + def sample_values + [ + 0, # midnight + 60 * 10, # 00:10:00 + 60 * 60 * 2 + 30, # 02:00:30 + ] + end + + def empty_value + 0 + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end + + sub_test_case("Time64ArrayBuilder") do + def create_builder + data_type = Arrow::Time64DataType.new(:micro) + Arrow::Time64ArrayBuilder.new(data_type) + end + + def value_data_type + Arrow::Time64DataType.new(:micro) + end + + def builder_class_name + "time64-array-builder" + end + + def sample_values + [ + 0, # midnight + 60 * 10 * 1000 * 1000, # 00:10:00.000000 + (60 * 60 * 2 + 30) * 1000 * 1000, # 02:00:30.000000 + ] + end + + def empty_value + 0 + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end + + sub_test_case("BinaryArrayBuilder") do + def create_builder + Arrow::BinaryArrayBuilder.new + end + + def value_data_type + Arrow::BinaryDataType.new + end + + def builder_class_name + "binary-array-builder" + end + + def sample_values + [ + "\x00\x01", + "\xfe\xff", + "", + ] + end + + def empty_value + "" + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_value_bytes") do + include ArrayBuilderAppendValueBytesTests + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + + def setup + require_gi_bindings(3, 4, 1) + end + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end + + sub_test_case("LargeBinaryArrayBuilder") do + def create_builder + Arrow::LargeBinaryArrayBuilder.new + end + + def value_data_type + Arrow::LargeBinaryDataType.new + end + + def builder_class_name + "large-binary-array-builder" + end + + def sample_values + [ + "\x00\x01", + "\xfe\xff", + "", + ] + end + + def empty_value + "" + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_value_bytes") do + include ArrayBuilderAppendValueBytesTests + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + + def setup + require_gi_bindings(3, 4, 1) + end + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end + + sub_test_case("StringArrayBuilder") do + def create_builder + Arrow::StringArrayBuilder.new + end + + def value_data_type + Arrow::StringDataType.new + end + + def builder_class_name + "string-array-builder" + end + + def sample_values + [ + "hello", + "world!!", + "", + ] + end + + def empty_value + "" + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + + def setup + require_gi_bindings(3, 4, 1) + end + + def builder_class_name + "binary-array-builder" + end + end + + sub_test_case("#append_strings") do + include ArrayBuilderAppendStringsTests + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end + + sub_test_case("LargeStringArrayBuilder") do + def create_builder + Arrow::LargeStringArrayBuilder.new + end + + def value_data_type + Arrow::LargeStringDataType.new + end + + def builder_class_name + "large-string-array-builder" + end + + def sample_values + [ + "hello", + "world!!", + "", + ] + end + + def empty_value + "" + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + + def setup + require_gi_bindings(3, 4, 1) + end + + def builder_class_name + "large-binary-array-builder" + end + end + + sub_test_case("#append_strings") do + include ArrayBuilderAppendStringsTests + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end + + sub_test_case("FixedSizeBinaryArrayBuilder") do + def create_builder + Arrow::FixedSizeBinaryArrayBuilder.new(value_data_type) + end + + def value_data_type + Arrow::FixedSizeBinaryDataType.new(4) + end + + def builder_class_name + "fixed-size-binary-array-builder" + end + + def sample_values + [ + "0123", + "abcd", + "\x0\x0\x0\x0".b, + ] + end + + def sample_values_with_null + [ + "0123", + nil, + "\x0\x0\x0\x0".b, + ] + end + + def pack_values(values) + values.join("") + end + + def empty_value + "\x0\x0\x0\x0" + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_value") do + test("nil") do + builder = create_builder + builder.append_value(nil) + assert_equal(build_array([nil]), + builder.finish) + end + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + include ArrayBuilderAppendValuesWithNullTests + end + + sub_test_case("#append_values_packed") do + include ArrayBuilderAppendValuesPackedTests + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end + + sub_test_case("Decimal128ArrayBuilder") do + def create_builder + Arrow::Decimal128ArrayBuilder.new(value_data_type) + end + + def value_data_type + Arrow::Decimal128DataType.new(8, 2) + end + + def builder_class_name + "decimal128-array-builder" + end + + def sample_values + [ + Arrow::Decimal128.new("23423445"), + Arrow::Decimal128.new("00012345"), + Arrow::Decimal128.new("00000000"), + ] + end + + def sample_values_with_null + [ + Arrow::Decimal128.new("23423445"), + nil, + Arrow::Decimal128.new("00000000"), + ] + end + + def pack_values(values) + values.collect(&:to_bytes).collect(&:to_s).join("") + end + + def empty_value + Arrow::Decimal128.new("0") + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_value") do + test("nil") do + builder = create_builder + builder.append_value(nil) + assert_equal(build_array([nil]), + builder.finish) + end + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + include ArrayBuilderAppendValuesWithNullTests + end + + sub_test_case("#append_values_packed") do + include ArrayBuilderAppendValuesPackedTests + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end + + sub_test_case("Decimal256ArrayBuilder") do + def create_builder + Arrow::Decimal256ArrayBuilder.new(value_data_type) + end + + def value_data_type + Arrow::Decimal256DataType.new(38, 2) + end + + def builder_class_name + "decimal256-array-builder" + end + + def sample_values + [ + Arrow::Decimal256.new("23423445"), + Arrow::Decimal256.new("00012345"), + Arrow::Decimal256.new("00000000"), + ] + end + + def sample_values_with_null + [ + Arrow::Decimal256.new("23423445"), + nil, + Arrow::Decimal256.new("00000000"), + ] + end + + def pack_values(values) + values.collect(&:to_bytes).collect(&:to_s).join("") + end + + def empty_value + Arrow::Decimal256.new("0") + end + + sub_test_case("value type") do + include ArrayBuilderValueTypeTests + end + + sub_test_case("#append_value") do + test("nil") do + builder = create_builder + builder.append_value(nil) + assert_equal(build_array([nil]), + builder.finish) + end + end + + sub_test_case("#append_values") do + include ArrayBuilderAppendValuesTests + include ArrayBuilderAppendValuesWithNullTests + end + + sub_test_case("#append_values_packed") do + include ArrayBuilderAppendValuesPackedTests + end + + sub_test_case("#append_nulls") do + include ArrayBuilderAppendNullsTests + end + + sub_test_case("#append_empty_value") do + include ArrayBuilderAppendEmptyValueTests + end + + sub_test_case("#append_empty_values") do + include ArrayBuilderAppendEmptyValuesTests + end + + sub_test_case("capacity control") do + include ArrayBuilderCapacityControlTests + end + + sub_test_case("#length") do + include ArrayBuilderLengthTests + end + + sub_test_case("#n_nulls") do + include ArrayBuilderNNullsTests + end + end +end diff --git a/src/arrow/c_glib/test/test-array-datum.rb b/src/arrow/c_glib/test/test-array-datum.rb new file mode 100644 index 000000000..623e5589c --- /dev/null +++ b/src/arrow/c_glib/test/test-array-datum.rb @@ -0,0 +1,70 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestArrayDatum < Test::Unit::TestCase + include Helper::Buildable + + def setup + @array = build_boolean_array([true, false]) + @datum = Arrow::ArrayDatum.new(@array) + end + + def test_array? + assert do + @datum.array? + end + end + + def test_array_like? + assert do + @datum.array_like? + end + end + + def test_scalar? + assert do + not @datum.scalar? + end + end + + def test_value? + assert do + @datum.value? + end + end + + sub_test_case("==") do + def test_true + assert_equal(Arrow::ArrayDatum.new(@array), + Arrow::ArrayDatum.new(@array)) + end + + def test_false + table = build_table("visible" => @array) + assert_not_equal(@datum, + Arrow::TableDatum.new(table)) + end + end + + def test_to_string + assert_equal("Array", @datum.to_s) + end + + def test_value + assert_equal(@array, @datum.value) + end +end diff --git a/src/arrow/c_glib/test/test-array-sort-options.rb b/src/arrow/c_glib/test/test-array-sort-options.rb new file mode 100644 index 000000000..afb6a7025 --- /dev/null +++ b/src/arrow/c_glib/test/test-array-sort-options.rb @@ -0,0 +1,31 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestArraySortOptions < Test::Unit::TestCase + include Helper::Buildable + + def test_new + options = Arrow::ArraySortOptions.new(:ascending) + assert_equal(Arrow::SortOrder::ASCENDING, + options.order) + end + + def test_equal + assert_equal(Arrow::ArraySortOptions.new(:descending), + Arrow::ArraySortOptions.new(:descending)) + end +end diff --git a/src/arrow/c_glib/test/test-array.rb b/src/arrow/c_glib/test/test-array.rb new file mode 100644 index 000000000..c03aecf17 --- /dev/null +++ b/src/arrow/c_glib/test/test-array.rb @@ -0,0 +1,188 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestArray < Test::Unit::TestCase + include Helper::Buildable + + def test_equal + assert_equal(build_boolean_array([true, false]), + build_boolean_array([true, false])) + end + + def test_equal_approx + array1 = build_double_array([1.1, 2.2 + Float::EPSILON * 10]) + array2 = build_double_array([1.1, 2.2]) + assert do + array1.equal_approx(array2) + end + end + + def test_equal_range + array1 = build_int32_array([1, 2, 3, 4, 5]) + array2 = build_int32_array([-2, -1, 0, 1, 2, 3, 4, 999]) + assert do + array1.equal_range(1, array2, 4, 3) + end + end + + def test_is_null + builder = Arrow::BooleanArrayBuilder.new + builder.append_null + builder.append_value(true) + array = builder.finish + assert_equal([true, false], + array.length.times.collect {|i| array.null?(i)}) + end + + def test_is_valid + builder = Arrow::BooleanArrayBuilder.new + builder.append_null + builder.append_value(true) + array = builder.finish + assert_equal([false, true], + array.length.times.collect {|i| array.valid?(i)}) + end + + def test_length + builder = Arrow::BooleanArrayBuilder.new + builder.append_value(true) + array = builder.finish + assert_equal(1, array.length) + end + + def test_n_nulls + builder = Arrow::BooleanArrayBuilder.new + builder.append_null + builder.append_null + array = builder.finish + assert_equal(2, array.n_nulls) + end + + def test_null_bitmap + builder = Arrow::BooleanArrayBuilder.new + builder.append_null + builder.append_value(true) + builder.append_value(false) + builder.append_null + builder.append_value(false) + array = builder.finish + assert_equal(0b10110, array.null_bitmap.data.to_s.unpack("c*")[0]) + end + + def test_value_data_type + builder = Arrow::BooleanArrayBuilder.new + array = builder.finish + assert_equal(Arrow::BooleanDataType.new, array.value_data_type) + end + + def test_value_type + builder = Arrow::BooleanArrayBuilder.new + array = builder.finish + assert_equal(Arrow::Type::BOOLEAN, array.value_type) + end + + def test_slice + builder = Arrow::BooleanArrayBuilder.new + builder.append_value(true) + builder.append_value(false) + builder.append_value(true) + array = builder.finish + sub_array = array.slice(1, 2) + assert_equal([false, true], + sub_array.length.times.collect {|i| sub_array.get_value(i)}) + end + + def test_to_s + assert_equal(<<-CONTENT.chomp, build_boolean_array([true, false, true]).to_s) +[ + true, + false, + true +] + CONTENT + end + + sub_test_case("#view") do + def test_valid + assert_equal(build_float_array([0.0, 1.5, -2.5, nil]), + build_int32_array([0, 1069547520, -1071644672, nil]).view(Arrow::FloatDataType.new)) + end + + def test_invalid + message = "[array][view]: Invalid: " + + "Can't view array of type int16 as int8: incompatible layouts" + error = assert_raise(Arrow::Error::Invalid) do + build_int16_array([0, -1, 3]).view(Arrow::Int8DataType.new) + end + assert_equal(message, error.message.lines.first.chomp) + end + end + + sub_test_case("#diff_unified") do + def test_no_diff + array = build_string_array(["Start", "Shutdown", "Reboot"]) + other_array = build_string_array(["Start", "Shutdown", "Reboot"]) + assert_nil(array.diff_unified(other_array)) + end + + def test_diff + array = build_string_array(["Start", "Shutdown", "Reboot"]) + other_array = build_string_array(["Start", "Shutdonw", "Reboot"]) + assert_equal(<<-STRING.chomp, array.diff_unified(other_array)) + +@@ -1, +1 @@ +-"Shutdown" ++"Shutdonw" + + STRING + end + + def test_different_type + array = build_string_array(["Start", "Shutdown", "Reboot"]) + other_array = build_int8_array([2, 3, 6, 10]) + assert_equal("# Array types differed: string vs int8\n", + array.diff_unified(other_array)) + end + end + + sub_test_case("#concatenate") do + def test_no_other_arrays + assert_equal(build_int32_array([1, 2, 3]), + build_int32_array([1, 2, 3]).concatenate([])) + end + + def test_multiple_other_arrays + a = build_int32_array([1, 2, 3]) + b = build_int32_array([4]) + c = build_int32_array([5, 6]) + assert_equal(build_int32_array([1, 2, 3, 4, 5, 6]), + a.concatenate([b, c])) + end + + def test_mixed_type + int32_array = build_int32_array([1, 2, 3]) + uint32_array = build_uint32_array([4]) + message = + "[array][concatenate]: Invalid: " + + "arrays to be concatenated must be identically typed, " + + "but int32 and uint32 were encountered." + assert_raise(Arrow::Error::Invalid.new(message)) do + int32_array.concatenate([uint32_array]) + end + end + end +end diff --git a/src/arrow/c_glib/test/test-binary-array.rb b/src/arrow/c_glib/test/test-binary-array.rb new file mode 100644 index 000000000..0dcaf4eef --- /dev/null +++ b/src/arrow/c_glib/test/test-binary-array.rb @@ -0,0 +1,61 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestBinaryArray < Test::Unit::TestCase + include Helper::Buildable + + def test_new + value_offsets = Arrow::Buffer.new([0, 2, 5, 5].pack("l*")) + data = Arrow::Buffer.new("\x00\x01\x02\x03\x04") + assert_equal(build_binary_array(["\x00\x01", "\x02\x03\x04", nil]), + Arrow::BinaryArray.new(3, + value_offsets, + data, + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_value + data = "\x00\x01\x02" + builder = Arrow::BinaryArrayBuilder.new + builder.append_value(data) + array = builder.finish + assert_equal(data, array.get_value(0).to_s) + end + + def test_buffer + data1 = "\x00\x01\x02" + data2 = "\x03\x04\x05" + builder = Arrow::BinaryArrayBuilder.new + builder.append_value(data1) + builder.append_value(data2) + array = builder.finish + assert_equal(data1 + data2, array.buffer.data.to_s) + end + + def test_offsets_buffer + data1 = "\x00\x01" + data2 = "\x02\x03\x04" + builder = Arrow::BinaryArrayBuilder.new + builder.append_value(data1) + builder.append_value(data2) + array = builder.finish + byte_per_offset = 4 + assert_equal([0, 2, 5].pack("l*"), + array.offsets_buffer.data.to_s[0, byte_per_offset * 3]) + end +end diff --git a/src/arrow/c_glib/test/test-binary-data-type.rb b/src/arrow/c_glib/test/test-binary-data-type.rb new file mode 100644 index 000000000..90fec1d74 --- /dev/null +++ b/src/arrow/c_glib/test/test-binary-data-type.rb @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestBinaryDataType < Test::Unit::TestCase + def test_type + data_type = Arrow::BinaryDataType.new + assert_equal(Arrow::Type::BINARY, data_type.id) + end + + def test_name + data_type = Arrow::BinaryDataType.new + assert_equal("binary", data_type.name) + end + + def test_to_s + data_type = Arrow::BinaryDataType.new + assert_equal("binary", data_type.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-binary-scalar.rb b/src/arrow/c_glib/test/test-binary-scalar.rb new file mode 100644 index 000000000..4efc50da0 --- /dev/null +++ b/src/arrow/c_glib/test/test-binary-scalar.rb @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestBinaryScalar < Test::Unit::TestCase + def setup + @buffer = Arrow::Buffer.new("\x03\x01\x02") + @scalar = Arrow::BinaryScalar.new(@buffer) + end + + def test_data_type + assert_equal(Arrow::BinaryDataType.new, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::BinaryScalar.new(@buffer), + @scalar) + end + + def test_to_s + assert_equal("\x03\x01\x02", @scalar.to_s) + end + + def test_value + assert_equal(@buffer, + @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-boolean-array.rb b/src/arrow/c_glib/test/test-boolean-array.rb new file mode 100644 index 000000000..3605d613d --- /dev/null +++ b/src/arrow/c_glib/test/test-boolean-array.rb @@ -0,0 +1,90 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestBooleanArray < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + def test_new + assert_equal(build_boolean_array([true, false, nil]), + Arrow::BooleanArray.new(3, + Arrow::Buffer.new([0b001].pack("C*")), + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_export + require_gi_bindings(3, 4, 8) + array = build_boolean_array([true, false, nil]) + success, c_abi_array, c_abi_schema = array.export + data_type = Arrow::DataType.import(c_abi_schema) + assert_equal([success, array], + [true, Arrow::Array.import(c_abi_array, data_type)]) + end + + def test_buffer + builder = Arrow::BooleanArrayBuilder.new + builder.append_value(true) + builder.append_value(false) + builder.append_value(true) + array = builder.finish + assert_equal([0b101].pack("C*"), array.buffer.data.to_s) + end + + def test_value + builder = Arrow::BooleanArrayBuilder.new + builder.append_value(true) + array = builder.finish + assert_equal(true, array.get_value(0)) + end + + def test_values + require_gi_bindings(3, 3, 1) + builder = Arrow::BooleanArrayBuilder.new + builder.append_value(true) + builder.append_value(false) + builder.append_value(true) + array = builder.finish + assert_equal([true, false, true], array.values) + end + + def test_invert + assert_equal(build_boolean_array([true, nil, false]), + build_boolean_array([false, nil, true]).invert) + end + + def test_and + left = build_boolean_array([true, false, nil, true]) + right = build_boolean_array([true, nil, true, false]) + assert_equal(build_boolean_array([true, nil, nil, false]), + left.and(right)) + end + + def test_or + left = build_boolean_array([true, false, nil, false]) + right = build_boolean_array([false, nil, true, false]) + assert_equal(build_boolean_array([true, nil, nil, false]), + left.or(right)) + end + + def test_xor + left = build_boolean_array([true, false, nil, true]) + right = build_boolean_array([false, nil, true, true]) + assert_equal(build_boolean_array([true, nil, nil, false]), + left.xor(right)) + end +end diff --git a/src/arrow/c_glib/test/test-boolean-data-type.rb b/src/arrow/c_glib/test/test-boolean-data-type.rb new file mode 100644 index 000000000..e9a18bee0 --- /dev/null +++ b/src/arrow/c_glib/test/test-boolean-data-type.rb @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestBooleanDataType < Test::Unit::TestCase + include Helper::Omittable + + def test_export + require_gi_bindings(3, 4, 8) + data_type = Arrow::BooleanDataType.new + c_abi_schema = data_type.export + assert_equal(data_type, + Arrow::DataType.import(c_abi_schema)) + end + + def test_type + data_type = Arrow::BooleanDataType.new + assert_equal(Arrow::Type::BOOLEAN, data_type.id) + end + + def test_name + data_type = Arrow::BooleanDataType.new + assert_equal("bool", data_type.name) + end + + def test_to_s + data_type = Arrow::BooleanDataType.new + assert_equal("bool", data_type.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-boolean-scalar.rb b/src/arrow/c_glib/test/test-boolean-scalar.rb new file mode 100644 index 000000000..f8913d6a7 --- /dev/null +++ b/src/arrow/c_glib/test/test-boolean-scalar.rb @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestBooleanScalar < Test::Unit::TestCase + def setup + @scalar = Arrow::BooleanScalar.new(true) + end + + def test_parse + assert_equal(@scalar, + Arrow::Scalar.parse(Arrow::BooleanDataType.new, + "true")) + end + + def test_data_type + assert_equal(Arrow::BooleanDataType.new, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::BooleanScalar.new(true), + @scalar) + end + + def test_to_s + assert_equal("true", @scalar.to_s) + end + + def test_value + assert_equal(true, @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-buffer-input-stream.rb b/src/arrow/c_glib/test/test-buffer-input-stream.rb new file mode 100644 index 000000000..e31ea38db --- /dev/null +++ b/src/arrow/c_glib/test/test-buffer-input-stream.rb @@ -0,0 +1,111 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestBufferInputStream < Test::Unit::TestCase + include Helper::Buildable + + def test_read + buffer = Arrow::Buffer.new("Hello World") + buffer_input_stream = Arrow::BufferInputStream.new(buffer) + read_buffer = buffer_input_stream.read(5) + assert_equal("Hello", read_buffer.data.to_s) + end + + def test_read_bytes + buffer = Arrow::Buffer.new("Hello World") + buffer_input_stream = Arrow::BufferInputStream.new(buffer) + read_bytes = buffer_input_stream.read_bytes(5) + assert_equal("Hello", read_bytes.to_s) + end + + def test_read_at + buffer = Arrow::Buffer.new("Hello World") + buffer_input_stream = Arrow::BufferInputStream.new(buffer) + read_buffer = buffer_input_stream.read_at(6, 3) + assert_equal("Wor", read_buffer.data.to_s) + end + + def test_read_at_bytes + buffer = Arrow::Buffer.new("Hello World") + buffer_input_stream = Arrow::BufferInputStream.new(buffer) + read_bytes = buffer_input_stream.read_at_bytes(6, 3) + assert_equal("Wor", read_bytes.to_s) + end + + def test_advance + buffer = Arrow::Buffer.new("Hello World") + buffer_input_stream = Arrow::BufferInputStream.new(buffer) + buffer_input_stream.advance(6) + read_buffer = buffer_input_stream.read(5) + assert_equal("World", read_buffer.data.to_s) + end + + def test_align + buffer = Arrow::Buffer.new("Hello World") + buffer_input_stream = Arrow::BufferInputStream.new(buffer) + buffer_input_stream.advance(3) + buffer_input_stream.align(8) + read_buffer = buffer_input_stream.read(3) + assert_equal("rld", read_buffer.data.to_s) + end + + def test_peek + buffer = Arrow::Buffer.new("Hello World") + buffer_input_stream = Arrow::BufferInputStream.new(buffer) + peeked_data = buffer_input_stream.peek(5) + assert_equal(buffer_input_stream.read(5).data.to_s, + peeked_data.to_s) + end + + def test_gio_input_stream + # U+3042 HIRAGANA LETTER A + data = "\u3042" + convert_encoding = "cp932" + buffer = Arrow::Buffer.new(data) + buffer_input_stream = Arrow::BufferInputStream.new(buffer) + converter = Gio::CharsetConverter.new(convert_encoding, "UTF-8") + convert_input_stream = + Gio::ConverterInputStream.new(buffer_input_stream, converter) + gio_input_stream = Arrow::GIOInputStream.new(convert_input_stream) + raw_read_data = gio_input_stream.read(10).data.to_s + assert_equal(data.encode(convert_encoding), + raw_read_data.dup.force_encoding(convert_encoding)) + end + + def test_read_record_batch + fields = [ + Arrow::Field.new("visible", Arrow::BooleanDataType.new), + Arrow::Field.new("valid", Arrow::BooleanDataType.new), + ] + schema = Arrow::Schema.new(fields) + columns = [ + build_boolean_array([true]), + build_boolean_array([false]), + ] + record_batch = Arrow::RecordBatch.new(schema, 1, columns) + + buffer = Arrow::ResizableBuffer.new(0) + output_stream = Arrow::BufferOutputStream.new(buffer) + output_stream.write_record_batch(record_batch) + output_stream.close + + input_stream = Arrow::BufferInputStream.new(buffer) + options = Arrow::ReadOptions.new + assert_equal(record_batch, + input_stream.read_record_batch(schema, options)) + end +end diff --git a/src/arrow/c_glib/test/test-buffer-output-stream.rb b/src/arrow/c_glib/test/test-buffer-output-stream.rb new file mode 100644 index 000000000..9866762ff --- /dev/null +++ b/src/arrow/c_glib/test/test-buffer-output-stream.rb @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestBufferOutputStream < Test::Unit::TestCase + include Helper::Buildable + + def test_new + buffer = Arrow::ResizableBuffer.new(0) + output_stream = Arrow::BufferOutputStream.new(buffer) + output_stream.write("Hello") + output_stream.close + assert_equal("Hello", buffer.data.to_s) + end + + def test_align + buffer = Arrow::ResizableBuffer.new(0) + output_stream = Arrow::BufferOutputStream.new(buffer) + output_stream.write("Hello") + output_stream.align(8) + output_stream.close + assert_equal("Hello\x00\x00\x00", buffer.data.to_s) + end + + def test_write_record_batch + fields = [ + Arrow::Field.new("visible", Arrow::BooleanDataType.new), + Arrow::Field.new("valid", Arrow::BooleanDataType.new), + ] + schema = Arrow::Schema.new(fields) + columns = [ + build_boolean_array([true]), + build_boolean_array([false]), + ] + record_batch = Arrow::RecordBatch.new(schema, 1, columns) + + buffer = Arrow::ResizableBuffer.new(0) + options = Arrow::WriteOptions.new + output_stream = Arrow::BufferOutputStream.new(buffer) + output_stream.write_record_batch(record_batch, options) + output_stream.close + + input_stream = Arrow::BufferInputStream.new(buffer) + assert_equal(record_batch, + input_stream.read_record_batch(schema)) + end +end diff --git a/src/arrow/c_glib/test/test-buffer.rb b/src/arrow/c_glib/test/test-buffer.rb new file mode 100644 index 000000000..0617eac04 --- /dev/null +++ b/src/arrow/c_glib/test/test-buffer.rb @@ -0,0 +1,102 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestBuffer < Test::Unit::TestCase + include Helper::Omittable + + def setup + @data = "Hello" + @buffer = Arrow::Buffer.new(@data) + end + + def test_new_bytes + bytes_data = GLib::Bytes.new(@data) + buffer = Arrow::Buffer.new(bytes_data) + assert_equal([ + bytes_data.pointer, + @data, + ], + [ + buffer.data.pointer, + buffer.data.to_s, + ]) + end + + def test_new_bytes_slice + bytes_data = GLib::Bytes.new(@data) + buffer = Arrow::Buffer.new(bytes_data) + sliced_buffer = buffer.slice(1, 3) + assert_equal([ + bytes_data.pointer + 1, + @data[1, 3], + ], + [ + sliced_buffer.data.pointer, + sliced_buffer.data.to_s, + ]) + end + + def test_equal + assert_equal(@buffer, + Arrow::Buffer.new(@data.dup)) + end + + def test_equal_n_bytes + buffer1 = Arrow::Buffer.new("Hello!") + buffer2 = Arrow::Buffer.new("Hello World!") + assert do + buffer1.equal_n_bytes(buffer2, 5) + end + end + + def test_mutable? + assert do + not @buffer.mutable? + end + end + + def test_capacity + assert_equal(@data.bytesize, @buffer.capacity) + end + + def test_data + assert_equal(@data, @buffer.data.to_s) + end + + def test_mutable_data + require_gi_bindings(3, 1, 2) + assert_nil(@buffer.mutable_data) + end + + def test_size + assert_equal(@data.bytesize, @buffer.size) + end + + def test_parent + assert_nil(@buffer.parent) + end + + def test_copy + copied_buffer = @buffer.copy(1, 3) + assert_equal(@data[1, 3], copied_buffer.data.to_s) + end + + def test_slice + sliced_buffer = @buffer.slice(1, 3) + assert_equal(@data[1, 3], sliced_buffer.data.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-call-expression.rb b/src/arrow/c_glib/test/test-call-expression.rb new file mode 100644 index 000000000..a8ba2f4cb --- /dev/null +++ b/src/arrow/c_glib/test/test-call-expression.rb @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestCallExpression < Test::Unit::TestCase + def setup + @arguments = [ + Arrow::FieldExpression.new("augend"), + Arrow::FieldExpression.new("addend"), + ] + @expression = Arrow::CallExpression.new("add", @arguments) + end + + sub_test_case("==") do + def test_true + assert_equal(Arrow::CallExpression.new("now", []), + Arrow::CallExpression.new("now", [])) + end + + def test_false + assert_not_equal(Arrow::CallExpression.new("a", []), + Arrow::CallExpression.new("b", [])) + end + end + + def test_to_string + assert_equal("add(augend, addend)", @expression.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-cast.rb b/src/arrow/c_glib/test/test-cast.rb new file mode 100644 index 000000000..528a0e8c1 --- /dev/null +++ b/src/arrow/c_glib/test/test-cast.rb @@ -0,0 +1,145 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestCast < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + def test_safe + data = [-1, 2, nil] + assert_equal(build_int32_array(data), + build_int8_array(data).cast(Arrow::Int32DataType.new)) + end + + sub_test_case("allow-int-overflow") do + def test_default + assert_raise(Arrow::Error::Invalid) do + build_int32_array([128]).cast(Arrow::Int8DataType.new) + end + end + + def test_true + options = Arrow::CastOptions.new + options.allow_int_overflow = true + assert_equal(build_int8_array([-128]), + build_int32_array([128]).cast(Arrow::Int8DataType.new, + options)) + end + end + + sub_test_case("allow-time-truncate") do + def test_default + after_epoch_in_milli = 1504953190854 # 2017-09-09T10:33:10.854Z + second_timestamp_data_type = Arrow::TimestampDataType.new(:second) + milli_array = build_timestamp_array(:milli, [after_epoch_in_milli]) + assert_raise(Arrow::Error::Invalid) do + milli_array.cast(second_timestamp_data_type) + end + end + + def test_true + options = Arrow::CastOptions.new + options.allow_time_truncate = true + after_epoch_in_milli = 1504953190854 # 2017-09-09T10:33:10.854Z + second_array = build_timestamp_array(:second, + [after_epoch_in_milli / 1000]) + milli_array = build_timestamp_array(:milli, [after_epoch_in_milli]) + second_timestamp_data_type = Arrow::TimestampDataType.new(:second) + assert_equal(second_array, + milli_array.cast(second_timestamp_data_type, options)) + end + end + + sub_test_case("allow-time-overflow") do + def test_default + after_epoch_in_second = 95617584000 # 5000-01-01T00:00:00Z + nano_timestamp_data_type = Arrow::TimestampDataType.new(:nano) + second_array = build_timestamp_array(:second, [after_epoch_in_second]) + assert_raise(Arrow::Error::Invalid) do + second_array.cast(nano_timestamp_data_type) + end + end + + def test_true + options = Arrow::CastOptions.new + options.allow_time_overflow = true + after_epoch_in_second = 95617584000 # 5000-01-01T00:00:00Z + second_array = build_timestamp_array(:second, + [after_epoch_in_second]) + after_epoch_in_nano_overflowed = + (after_epoch_in_second * 1000 * 1000 * 1000) % (2 ** 64) + nano_array = build_timestamp_array(:nano, + [after_epoch_in_nano_overflowed]) + nano_timestamp_data_type = Arrow::TimestampDataType.new(:nano) + assert_equal(nano_array, + second_array.cast(nano_timestamp_data_type, options)) + end + end + + sub_test_case("allow-decimal-truncate") do + def test_default + decimal128_data_type = Arrow::Decimal128DataType.new(8, 2) + decimal128_array = build_decimal128_array(decimal128_data_type, + ["23423445"]) + assert_raise(Arrow::Error::Invalid) do + decimal128_array.cast(Arrow::Int64DataType.new) + end + end + + def test_true + options = Arrow::CastOptions.new + options.allow_decimal_truncate = true + decimal128_data_type = Arrow::Decimal128DataType.new(8, 2) + decimal128_array = build_decimal128_array(decimal128_data_type, + ["23423445"]) + assert_equal(build_int64_array([234234]), + decimal128_array.cast(Arrow::Int64DataType.new, options)) + end + end + + sub_test_case("allow-float-truncate") do + def test_default + assert_raise(Arrow::Error::Invalid) do + build_float_array([1.1]).cast(Arrow::Int8DataType.new) + end + end + + def test_true + options = Arrow::CastOptions.new + options.allow_float_truncate = true + int8_data_type = Arrow::Int8DataType.new + assert_equal(build_int8_array([1]), + build_float_array([1.1]).cast(int8_data_type, options)) + end + end + + sub_test_case("allow-invalid-utf8") do + def test_default + assert_raise(Arrow::Error::Invalid) do + build_binary_array(["\xff"]).cast(Arrow::StringDataType.new) + end + end + + def test_true + options = Arrow::CastOptions.new + options.allow_invalid_utf8 = true + string_data_type = Arrow::StringDataType.new + assert_equal(build_string_array(["\xff"]), + build_binary_array(["\xff"]).cast(string_data_type, options)) + end + end +end diff --git a/src/arrow/c_glib/test/test-chunked-array-datum.rb b/src/arrow/c_glib/test/test-chunked-array-datum.rb new file mode 100644 index 000000000..763173153 --- /dev/null +++ b/src/arrow/c_glib/test/test-chunked-array-datum.rb @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestChunkedArrayDatum < Test::Unit::TestCase + include Helper::Buildable + + def setup + @array = build_boolean_array([true, false]) + @chunked_array = Arrow::ChunkedArray.new([@array]) + @datum = Arrow::ChunkedArrayDatum.new(@chunked_array) + end + + def test_array? + assert do + not @datum.array? + end + end + + def test_array_like? + assert do + @datum.array_like? + end + end + + sub_test_case("==") do + def test_true + assert_equal(Arrow::ChunkedArrayDatum.new(@chunked_array), + Arrow::ChunkedArrayDatum.new(@chunked_array)) + end + + def test_false + assert_not_equal(@datum, + Arrow::ArrayDatum.new(@array)) + end + end + + def test_to_string + assert_equal("ChunkedArray", @datum.to_s) + end + + def test_value + assert_equal(@chunked_array, @datum.value) + end +end diff --git a/src/arrow/c_glib/test/test-chunked-array.rb b/src/arrow/c_glib/test/test-chunked-array.rb new file mode 100644 index 000000000..8f912ac84 --- /dev/null +++ b/src/arrow/c_glib/test/test-chunked-array.rb @@ -0,0 +1,141 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestChunkedArray < Test::Unit::TestCase + include Helper::Buildable + + def test_equal + chunks1 = [ + build_boolean_array([true, false]), + build_boolean_array([true]), + ] + chunks2 = [ + build_boolean_array([true]), + build_boolean_array([false, true]), + ] + assert_equal(Arrow::ChunkedArray.new(chunks1), + Arrow::ChunkedArray.new(chunks2)) + end + + def test_value_data_type + chunks = [ + build_boolean_array([true, false]), + build_boolean_array([true]), + ] + assert_equal(Arrow::BooleanDataType.new, + Arrow::ChunkedArray.new(chunks).value_data_type) + end + + def test_value_type + chunks = [ + build_boolean_array([true, false]), + build_boolean_array([true]), + ] + assert_equal(Arrow::Type::BOOLEAN, + Arrow::ChunkedArray.new(chunks).value_type) + end + + def test_n_rows + chunks = [ + build_boolean_array([true, false]), + build_boolean_array([true]), + ] + chunked_array = Arrow::ChunkedArray.new(chunks) + assert_equal(3, chunked_array.n_rows) + end + + def test_n_nulls + chunks = [ + build_boolean_array([true, nil, false]), + build_boolean_array([nil, nil, true]), + ] + chunked_array = Arrow::ChunkedArray.new(chunks) + assert_equal(3, chunked_array.n_nulls) + end + + + def test_n_chunks + chunks = [ + build_boolean_array([true]), + build_boolean_array([false]), + ] + chunked_array = Arrow::ChunkedArray.new(chunks) + assert_equal(2, chunked_array.n_chunks) + end + + def test_chunk + chunks = [ + build_boolean_array([true, false]), + build_boolean_array([false]), + ] + chunked_array = Arrow::ChunkedArray.new(chunks) + assert_equal(2, chunked_array.get_chunk(0).length) + end + + def test_chunks + chunks = [ + build_boolean_array([true, false]), + build_boolean_array([false]), + ] + chunked_array = Arrow::ChunkedArray.new(chunks) + assert_equal([2, 1], + chunked_array.chunks.collect(&:length)) + end + + def test_slice + chunks1 = [ + build_boolean_array([true, false, true]), + build_boolean_array([false, true]), + ] + chunks2 = [ + build_boolean_array([false, true]), + build_boolean_array([false]), + ] + chunked_array = Arrow::ChunkedArray.new(chunks1) + sub_chunked_array = chunked_array.slice(1, 3) + assert_equal(chunks2, sub_chunked_array.chunks) + end + + def test_to_s + chunks = [ + build_boolean_array([true, false]), + build_boolean_array([true]), + ] + chunked_array = Arrow::ChunkedArray.new(chunks) + assert_equal(<<-PRETTY_PRINT.chomp, chunked_array.to_s) +[ + [ + true, + false + ], + [ + true + ] +] + PRETTY_PRINT + end + + def test_combine + chunks = [ + build_boolean_array([true]), + build_boolean_array([false, nil]), + ] + chunked_array = Arrow::ChunkedArray.new(chunks) + assert_equal(build_boolean_array([true, false, nil]), + chunked_array.combine) + end +end diff --git a/src/arrow/c_glib/test/test-codec.rb b/src/arrow/c_glib/test/test-codec.rb new file mode 100644 index 000000000..a32ec4dc7 --- /dev/null +++ b/src/arrow/c_glib/test/test-codec.rb @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestCodec < Test::Unit::TestCase + def test_name + codec = Arrow::Codec.new(:gzip) + assert_equal("gzip", codec.name) + end + + def test_compression_type + codec = Arrow::Codec.new(:gzip) + assert_equal(Arrow::CompressionType::GZIP, codec.compression_type) + end + + def test_compression_level + codec = Arrow::Codec.new(:gzip) + assert_equal(9, codec.compression_level) + end +end diff --git a/src/arrow/c_glib/test/test-compressed-input-stream.rb b/src/arrow/c_glib/test/test-compressed-input-stream.rb new file mode 100644 index 000000000..71f230a50 --- /dev/null +++ b/src/arrow/c_glib/test/test-compressed-input-stream.rb @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestCompressedInputStream < Test::Unit::TestCase + include Helper::Buildable + + def test_read + data = "Hello" + + output = StringIO.new + Zlib::GzipWriter.wrap(output) do |gz| + gz.write(data) + end + + codec = Arrow::Codec.new(:gzip) + buffer = Arrow::Buffer.new(output.string) + raw_input = Arrow::BufferInputStream.new(buffer) + input = Arrow::CompressedInputStream.new(codec, raw_input) + assert_equal(data, input.read(data.bytesize).data.to_s) + input.close + raw_input.close + end + + def test_raw + buffer = Arrow::Buffer.new("Hello") + raw_input = Arrow::BufferInputStream.new(buffer) + codec = Arrow::Codec.new(:gzip) + input = Arrow::CompressedInputStream.new(codec, raw_input) + assert_equal(raw_input, input.raw) + end +end diff --git a/src/arrow/c_glib/test/test-compressed-output-stream.rb b/src/arrow/c_glib/test/test-compressed-output-stream.rb new file mode 100644 index 000000000..eb54a45d3 --- /dev/null +++ b/src/arrow/c_glib/test/test-compressed-output-stream.rb @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestCompressedOutputStream < Test::Unit::TestCase + include Helper::Buildable + + def test_write + data = "Hello" + buffer = Arrow::ResizableBuffer.new(8) + raw_output = Arrow::BufferOutputStream.new(buffer) + codec = Arrow::Codec.new(:gzip) + output = Arrow::CompressedOutputStream.new(codec, raw_output) + output.write(data) + output.close + + input = StringIO.new(buffer.data.to_s) + Zlib::GzipReader.wrap(input) do |gz| + assert_equal(data, gz.read) + end + end + + def test_raw + buffer = Arrow::ResizableBuffer.new(8) + raw_output = Arrow::BufferOutputStream.new(buffer) + codec = Arrow::Codec.new(:gzip) + output = Arrow::CompressedOutputStream.new(codec, raw_output) + assert_equal(raw_output, output.raw) + end +end diff --git a/src/arrow/c_glib/test/test-count-values.rb b/src/arrow/c_glib/test/test-count-values.rb new file mode 100644 index 000000000..46c36cf47 --- /dev/null +++ b/src/arrow/c_glib/test/test-count-values.rb @@ -0,0 +1,51 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestCountValues < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + def test_int32 + array = build_int32_array([1, 3, 1, -1, -3, -1]) + fields = [ + Arrow::Field.new("values", Arrow::Int32DataType.new), + Arrow::Field.new("counts", Arrow::Int64DataType.new), + ] + structs = [ + {"values" => 1, "counts" => 2}, + {"values" => 3, "counts" => 1}, + {"values" => -1, "counts" => 2}, + {"values" => -3, "counts" => 1}, + ] + assert_equal(build_struct_array(fields, structs), + array.count_values) + end + + def test_string + array = build_string_array(["Ruby", "Python", "Ruby"]) + fields = [ + Arrow::Field.new("values", Arrow::StringDataType.new), + Arrow::Field.new("counts", Arrow::Int64DataType.new), + ] + structs = [ + {"values" => "Ruby", "counts" => 2}, + {"values" => "Python", "counts" => 1}, + ] + assert_equal(build_struct_array(fields, structs), + array.count_values) + end +end diff --git a/src/arrow/c_glib/test/test-count.rb b/src/arrow/c_glib/test/test-count.rb new file mode 100644 index 000000000..6e9421914 --- /dev/null +++ b/src/arrow/c_glib/test/test-count.rb @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestCount < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + sub_test_case("mode") do + def test_default + assert_equal(2, build_int32_array([1, nil, 3]).count) + + options = Arrow::CountOptions.new + options.mode = Arrow::CountMode::ONLY_VALID + assert_equal(2, build_int32_array([1, nil, 3]).count(options)) + end + + def test_nulls + options = Arrow::CountOptions.new + options.mode = Arrow::CountMode::ONLY_NULL + assert_equal(1, build_int32_array([1, nil, 3]).count(options)) + end + + def test_all + options = Arrow::CountOptions.new + options.mode = Arrow::CountMode::ALL + assert_equal(3, build_int32_array([1, nil, 3]).count(options)) + end + end +end diff --git a/src/arrow/c_glib/test/test-csv-reader.rb b/src/arrow/c_glib/test/test-csv-reader.rb new file mode 100644 index 000000000..018f062ac --- /dev/null +++ b/src/arrow/c_glib/test/test-csv-reader.rb @@ -0,0 +1,241 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestCSVReader < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + sub_test_case("#read") do + def open_input(csv) + buffer = Arrow::Buffer.new(csv) + Arrow::BufferInputStream.new(buffer) + end + + def test_default + table = Arrow::CSVReader.new(open_input(<<-CSV)) +message,count +"Start",2 +"Shutdown",9 + CSV + columns = { + "message" => build_string_array(["Start", "Shutdown"]), + "count" => build_int64_array([2, 9]), + } + assert_equal(build_table(columns), + table.read) + end + + sub_test_case("options") do + def test_add_column_type + options = Arrow::CSVReadOptions.new + options.add_column_type("count", Arrow::UInt8DataType.new) + options.add_column_type("valid", Arrow::BooleanDataType.new) + table = Arrow::CSVReader.new(open_input(<<-CSV), options) +count,valid +2,1 +9,0 + CSV + columns = { + "count" => build_uint8_array([2, 9]), + "valid" => build_boolean_array([true, false]), + } + assert_equal(build_table(columns), + table.read) + end + + def test_add_schema + options = Arrow::CSVReadOptions.new + fields = [ + Arrow::Field.new("count", Arrow::UInt8DataType.new), + Arrow::Field.new("valid", Arrow::BooleanDataType.new), + ] + schema = Arrow::Schema.new(fields) + options.add_schema(schema) + table = Arrow::CSVReader.new(open_input(<<-CSV), options) +count,valid +2,1 +9,0 + CSV + columns = { + "count" => build_uint8_array([2, 9]), + "valid" => build_boolean_array([true, false]), + } + assert_equal(build_table(columns), + table.read) + end + + def test_column_types + require_gi_bindings(3, 3, 1) + options = Arrow::CSVReadOptions.new + options.add_column_type("count", Arrow::UInt8DataType.new) + options.add_column_type("valid", Arrow::BooleanDataType.new) + assert_equal({ + "count" => Arrow::UInt8DataType.new, + "valid" => Arrow::BooleanDataType.new, + }, + options.column_types) + end + + def test_null_values + options = Arrow::CSVReadOptions.new + null_values = ["2", "5"] + options.null_values = null_values + assert_equal(null_values, options.null_values) + + table = Arrow::CSVReader.new(open_input(<<-CSV), options) +message,count +"Start",2 +"Shutdown",9 +"Restart",5 + CSV + columns = { + "message" => build_string_array(["Start", "Shutdown", "Restart"]), + "count" => build_int64_array([nil, 9, nil]), + } + assert_equal(build_table(columns), + table.read) + end + + def test_add_null_value + options = Arrow::CSVReadOptions.new + null_values = ["2", "5"] + options.null_values = null_values + options.add_null_value("9") + assert_equal(null_values + ["9"], options.null_values) + end + + def test_boolean_values + options = Arrow::CSVReadOptions.new + true_values = ["Start", "Restart"] + options.true_values = true_values + assert_equal(true_values, options.true_values) + + false_values = ["Shutdown"] + options.false_values = false_values + assert_equal(false_values, options.false_values) + + table = Arrow::CSVReader.new(open_input(<<-CSV), options) +message,count +"Start",2 +"Shutdown",9 +"Restart",5 + CSV + columns = { + "message" => build_boolean_array([true, false, true]), + "count" => build_int64_array([2, 9, 5]), + } + assert_equal(build_table(columns), + table.read) + end + + def test_add_true_value + options = Arrow::CSVReadOptions.new + true_values = ["Start", "Restart"] + options.true_values = true_values + options.add_true_value("Shutdown") + assert_equal(true_values + ["Shutdown"], options.true_values) + end + + def test_add_false_value + options = Arrow::CSVReadOptions.new + false_values = ["Start", "Restart"] + options.false_values = false_values + options.add_false_value("Shutdown") + assert_equal(false_values + ["Shutdown"], options.false_values) + end + + def test_allow_null_strings + options = Arrow::CSVReadOptions.new + options.null_values = ["Start", "Restart"] + options.allow_null_strings = true + table = Arrow::CSVReader.new(open_input(<<-CSV), options) +message,count +"Start",2 +"Shutdown",9 +"Restart",5 + CSV + columns = { + "message" => build_string_array([nil, "Shutdown", nil]), + "count" => build_int64_array([2, 9, 5]), + } + assert_equal(build_table(columns), + table.read) + end + + def test_n_skip_rows + options = Arrow::CSVReadOptions.new + options.n_skip_rows = 1 + table = Arrow::CSVReader.new(open_input(<<-CSV), options) +message1,message2 +"Start1","Start2" +"Shutdown1","Shutdown2" +"Reboot1","Reboot2" + CSV + columns = { + "Start1" => build_string_array(["Shutdown1", "Reboot1"]), + "Start2" => build_string_array(["Shutdown2", "Reboot2"]), + } + assert_equal(build_table(columns), + table.read) + end + + def test_column_names + options = Arrow::CSVReadOptions.new + column_names = ["message", "count"] + options.column_names = column_names + assert_equal(column_names, options.column_names) + + table = Arrow::CSVReader.new(open_input(<<-CSV), options) +"Start",2 +"Shutdown",9 +"Reboot",5 + CSV + columns = { + "message" => build_string_array(["Start", "Shutdown", "Reboot"]), + "count" => build_int64_array([2, 9, 5]), + } + assert_equal(build_table(columns), + table.read) + end + + def test_add_column_name + options = Arrow::CSVReadOptions.new + column_names = ["message", "count"] + options.column_names = column_names + options.add_column_name("score") + assert_equal(column_names + ["score"], options.column_names) + end + + def test_generate_column_names + options = Arrow::CSVReadOptions.new + options.generate_column_names = true + + table = Arrow::CSVReader.new(open_input(<<-CSV), options) +"Start",2 +"Shutdown",9 +"Reboot",5 + CSV + columns = { + "f0" => build_string_array(["Start", "Shutdown", "Reboot"]), + "f1" => build_int64_array([2, 9, 5]), + } + assert_equal(build_table(columns), + table.read) + end + end + end +end diff --git a/src/arrow/c_glib/test/test-cuda.rb b/src/arrow/c_glib/test/test-cuda.rb new file mode 100644 index 000000000..c9b349293 --- /dev/null +++ b/src/arrow/c_glib/test/test-cuda.rb @@ -0,0 +1,159 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestCUDA < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + def setup + omit("Arrow CUDA is required") unless defined?(::ArrowCUDA) + @manager = ArrowCUDA::DeviceManager.new + omit("At least one GPU is required") if @manager.n_devices.zero? + @context = @manager.get_context(0) + end + + sub_test_case("Context") do + def test_allocated_size + allocated_size_before = @context.allocated_size + size = 128 + buffer = ArrowCUDA::Buffer.new(@context, size) + assert_equal(size, + @context.allocated_size - allocated_size_before) + end + end + + sub_test_case("Buffer") do + def setup + super + @buffer = ArrowCUDA::Buffer.new(@context, 128) + end + + def test_copy + @buffer.copy_from_host("Hello World") + assert_equal("llo W", @buffer.copy_to_host(2, 5).to_s) + end + + def test_export + require_gi_bindings(3, 3, 9) + @buffer.copy_from_host("Hello World") + handle = @buffer.export + serialized_handle = handle.serialize.data + Tempfile.open("arrow-cuda-export") do |output| + pid = spawn(RbConfig.ruby, "-e", <<-SCRIPT) +require "gi" + +Gio = GI.load("Gio") +Arrow = GI.load("Arrow") +ArrowCUDA = GI.load("ArrowCUDA") + +manager = ArrowCUDA::DeviceManager.new +context = manager.get_context(0) +serialized_handle = #{serialized_handle.to_s.dump} +handle = ArrowCUDA::IPCMemoryHandle.new(serialized_handle) +buffer = ArrowCUDA::Buffer.new(context, handle) +File.open(#{output.path.dump}, "w") do |output| + output.print(buffer.copy_to_host(0, 6).to_s) +end + SCRIPT + Process.waitpid(pid) + assert_equal("Hello ", output.read) + end + end + + def test_context + assert_equal(@context.allocated_size, + @buffer.context.allocated_size) + end + + sub_test_case("#read_record_batch") do + def setup + super + @field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new) + @schema = Arrow::Schema.new([@field]) + @columns = [ + build_boolean_array([true]), + ] + @cpu_record_batch = Arrow::RecordBatch.new(@schema, 1, @columns) + + @buffer = ArrowCUDA::Buffer.new(@context, @cpu_record_batch) + end + + def test_default + gpu_record_batch = @buffer.read_record_batch(@schema) + assert_equal(@cpu_record_batch.n_rows, + gpu_record_batch.n_rows) + end + + def test_options + options = Arrow::ReadOptions.new + gpu_record_batch = @buffer.read_record_batch(@schema, options) + assert_equal(@cpu_record_batch.n_rows, + gpu_record_batch.n_rows) + end + end + end + + sub_test_case("HostBuffer") do + def test_new + buffer = ArrowCUDA::HostBuffer.new(0, 128) + assert_equal(128, buffer.size) + end + end + + sub_test_case("BufferInputStream") do + def test_new + buffer = ArrowCUDA::Buffer.new(@context, 128) + buffer.copy_from_host("Hello World") + stream = ArrowCUDA::BufferInputStream.new(buffer) + begin + assert_equal("Hello Worl", stream.read(5).copy_to_host(0, 10).to_s) + ensure + stream.close + end + end + end + + sub_test_case("BufferOutputStream") do + def setup + super + @buffer = ArrowCUDA::Buffer.new(@context, 128) + @buffer.copy_from_host("\x00" * @buffer.size) + @stream = ArrowCUDA::BufferOutputStream.new(@buffer) + end + + def cleanup + super + @stream.close + end + + def test_new + @stream.write("Hello World") + assert_equal("Hello World", @buffer.copy_to_host(0, 11).to_s) + end + + def test_buffer + assert_equal(0, @stream.buffer_size) + @stream.buffer_size = 5 + assert_equal(5, @stream.buffer_size) + @stream.write("Hell") + assert_equal(4, @stream.buffered_size) + assert_equal("\x00" * 5, @buffer.copy_to_host(0, 5).to_s) + @stream.write("o") + assert_equal("Hello", @buffer.copy_to_host(0, 5).to_s) + end + end +end diff --git a/src/arrow/c_glib/test/test-date32-array.rb b/src/arrow/c_glib/test/test-date32-array.rb new file mode 100644 index 000000000..09ef78650 --- /dev/null +++ b/src/arrow/c_glib/test/test-date32-array.rb @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDate32Array < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + def test_new + after_epoch = 17406 # 2017-08-28 + raw_data = [0, after_epoch] + assert_equal(build_date32_array([*raw_data, nil]), + Arrow::Date32Array.new(3, + Arrow::Buffer.new(raw_data.pack("l*")), + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_buffer + before_epoch = -3653 # 1960-01-01 + after_epoch = 17406 # 2017-08-28 + + builder = Arrow::Date32ArrayBuilder.new + builder.append_value(0) + builder.append_value(after_epoch) + builder.append_value(before_epoch) + array = builder.finish + assert_equal([0, after_epoch, before_epoch].pack("l*"), + array.buffer.data.to_s) + end + + def test_value + after_epoch = 17406 # 2017-08-28 + + builder = Arrow::Date32ArrayBuilder.new + builder.append_value(after_epoch) + array = builder.finish + assert_equal(after_epoch, array.get_value(0)) + end + + def test_values + before_epoch = -3653 # 1960-01-01 + after_epoch = 17406 # 2017-08-28 + + builder = Arrow::Date32ArrayBuilder.new + builder.append_value(0) + builder.append_value(after_epoch) + builder.append_value(before_epoch) + array = builder.finish + assert_equal([0, after_epoch, before_epoch], array.values) + end +end diff --git a/src/arrow/c_glib/test/test-date32-data-type.rb b/src/arrow/c_glib/test/test-date32-data-type.rb new file mode 100644 index 000000000..e3c80e106 --- /dev/null +++ b/src/arrow/c_glib/test/test-date32-data-type.rb @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDate32DataType < Test::Unit::TestCase + def test_type + data_type = Arrow::Date32DataType.new + assert_equal(Arrow::Type::DATE32, data_type.id) + end + + def test_name + data_type = Arrow::Date32DataType.new + assert_equal("date32", data_type.name) + end + + def test_to_s + data_type = Arrow::Date32DataType.new + assert_equal("date32[day]", data_type.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-date32-scalar.rb b/src/arrow/c_glib/test/test-date32-scalar.rb new file mode 100644 index 000000000..ae41ebf72 --- /dev/null +++ b/src/arrow/c_glib/test/test-date32-scalar.rb @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDate32Scalar < Test::Unit::TestCase + def setup + @value = 17406 # 2017-08-28 + @scalar = Arrow::Date32Scalar.new(@value) + end + + def test_data_type + assert_equal(Arrow::Date32DataType.new, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::Date32Scalar.new(@value), + @scalar) + end + + def test_to_s + assert_equal("2017-08-28", @scalar.to_s) + end + + def test_value + assert_equal(@value, @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-date64-array.rb b/src/arrow/c_glib/test/test-date64-array.rb new file mode 100644 index 000000000..4d9f18919 --- /dev/null +++ b/src/arrow/c_glib/test/test-date64-array.rb @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDate64Array < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + def test_new + after_epoch = 1503878400000 # 2017-08-28T00:00:00Z + raw_data = [0, after_epoch] + assert_equal(build_date64_array([*raw_data, nil]), + Arrow::Date64Array.new(3, + Arrow::Buffer.new(raw_data.pack("q*")), + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_buffer + before_epoch = -315619200 # 1960-01-01T00:00:00Z + after_epoch = 1503878400000 # 2017-08-28T00:00:00Z + + builder = Arrow::Date64ArrayBuilder.new + builder.append_value(0) + builder.append_value(after_epoch) + builder.append_value(before_epoch) + array = builder.finish + assert_equal([0, after_epoch, before_epoch].pack("q*"), + array.buffer.data.to_s) + end + + def test_value + after_epoch = 1503878400000 # 2017-08-28T00:00:00Z + + builder = Arrow::Date64ArrayBuilder.new + builder.append_value(after_epoch) + array = builder.finish + assert_equal(after_epoch, array.get_value(0)) + end + + def test_values + before_epoch = -315619200 # 1960-01-01T00:00:00Z + after_epoch = 1503878400000 # 2017-08-28T00:00:00Z + + builder = Arrow::Date64ArrayBuilder.new + builder.append_value(0) + builder.append_value(after_epoch) + builder.append_value(before_epoch) + array = builder.finish + assert_equal([0, after_epoch, before_epoch], array.values) + end +end diff --git a/src/arrow/c_glib/test/test-date64-data-type.rb b/src/arrow/c_glib/test/test-date64-data-type.rb new file mode 100644 index 000000000..5a5ccbac5 --- /dev/null +++ b/src/arrow/c_glib/test/test-date64-data-type.rb @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDate64DataType < Test::Unit::TestCase + def test_type + data_type = Arrow::Date64DataType.new + assert_equal(Arrow::Type::DATE64, data_type.id) + end + + def test_name + data_type = Arrow::Date64DataType.new + assert_equal("date64", data_type.name) + end + + def test_to_s + data_type = Arrow::Date64DataType.new + assert_equal("date64[ms]", data_type.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-date64-scalar.rb b/src/arrow/c_glib/test/test-date64-scalar.rb new file mode 100644 index 000000000..ce39d3c2d --- /dev/null +++ b/src/arrow/c_glib/test/test-date64-scalar.rb @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDate64Scalar < Test::Unit::TestCase + def setup + @value = 1503878400000 # 2017-08-28T00:00:00Z + @scalar = Arrow::Date64Scalar.new(@value) + end + + def test_data_type + assert_equal(Arrow::Date64DataType.new, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::Date64Scalar.new(@value), + @scalar) + end + + def test_to_s + assert_equal("2017-08-28", @scalar.to_s) + end + + def test_value + assert_equal(@value, @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-decimal128-array.rb b/src/arrow/c_glib/test/test-decimal128-array.rb new file mode 100644 index 000000000..132ceb778 --- /dev/null +++ b/src/arrow/c_glib/test/test-decimal128-array.rb @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDecimal128Array < Test::Unit::TestCase + def test_format_value + data_type = Arrow::Decimal128DataType.new(8, 2) + builder = Arrow::Decimal128ArrayBuilder.new(data_type) + decimal = Arrow::Decimal128.new("23423445") + builder.append_value(decimal) + array = builder.finish + assert_equal("234234.45", array.format_value(0)) + end + + def test_value + data_type = Arrow::Decimal128DataType.new(8, 2) + builder = Arrow::Decimal128ArrayBuilder.new(data_type) + decimal = Arrow::Decimal128.new("23423445") + builder.append_value(decimal) + array = builder.finish + assert_equal("234234.45", + array.get_value(0).to_string_scale(array.value_data_type.scale)) + end +end diff --git a/src/arrow/c_glib/test/test-decimal128-data-type.rb b/src/arrow/c_glib/test/test-decimal128-data-type.rb new file mode 100644 index 000000000..b27e1cad1 --- /dev/null +++ b/src/arrow/c_glib/test/test-decimal128-data-type.rb @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDecimal128DataType < Test::Unit::TestCase + def test_type + data_type = Arrow::Decimal128DataType.new(2, 0) + assert_equal(Arrow::Type::DECIMAL128, data_type.id) + end + + def test_name + data_type = Arrow::Decimal128DataType.new(2, 0) + assert_equal("decimal128", data_type.name) + end + + def test_to_s + data_type = Arrow::Decimal128DataType.new(2, 0) + assert_equal("decimal128(2, 0)", data_type.to_s) + end + + def test_precision + data_type = Arrow::Decimal128DataType.new(8, 2) + assert_equal(8, data_type.precision) + end + + def test_scale + data_type = Arrow::Decimal128DataType.new(8, 2) + assert_equal(2, data_type.scale) + end +end diff --git a/src/arrow/c_glib/test/test-decimal128-scalar.rb b/src/arrow/c_glib/test/test-decimal128-scalar.rb new file mode 100644 index 000000000..380623a67 --- /dev/null +++ b/src/arrow/c_glib/test/test-decimal128-scalar.rb @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDecimal128Scalar < Test::Unit::TestCase + def setup + @data_type = Arrow::Decimal128DataType.new(8, 2) + @value = Arrow::Decimal128.new("23423445") + @scalar = Arrow::Decimal128Scalar.new(@data_type, @value) + end + + def test_data_type + assert_equal(@data_type, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::Decimal128Scalar.new(@data_type, @value), + @scalar) + end + + def test_to_s + assert_equal("234234.45", @scalar.to_s) + end + + def test_value + assert_equal(@value, @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-decimal128.rb b/src/arrow/c_glib/test/test-decimal128.rb new file mode 100644 index 000000000..8f14cfbe5 --- /dev/null +++ b/src/arrow/c_glib/test/test-decimal128.rb @@ -0,0 +1,233 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDecimal128 < Test::Unit::TestCase + include Helper::Omittable + + def test_copy + decimal = Arrow::Decimal128.new("234.23445") + assert_equal(decimal, decimal.copy) + end + + def test_to_string_scale + integer_data = 23423445 + string_data = "234.23445" + decimal = Arrow::Decimal128.new(integer_data) + assert_equal(string_data, decimal.to_string_scale(5)) + end + + def test_to_string + string_data = "99999999999999999999999999999999999999" + decimal = Arrow::Decimal128.new(string_data) + assert_equal(string_data, decimal.to_s) + end + + def test_to_bytes + decimal = Arrow::Decimal128.new("12.3") + assert_equal([123, 0].pack("q*"), + decimal.to_bytes.to_s) + end + + def test_abs + absolute_value = "23049223942343532412" + negative_value = "-23049223942343532412" + decimal = Arrow::Decimal128.new(negative_value) + decimal.abs + assert_equal(absolute_value, decimal.to_s) + end + + def test_negate + positive_value = "23049223942343532412" + negative_value = "-23049223942343532412" + decimal = Arrow::Decimal128.new(positive_value) + decimal.negate + assert_equal(negative_value, decimal.to_s) + decimal.negate + assert_equal(positive_value, decimal.to_s) + end + + def test_to_integer + integer_data = 999999999999999999 + decimal = Arrow::Decimal128.new(integer_data) + assert_equal(integer_data, decimal.to_i) + end + + def test_plus + integer_data1 = 23423445 + integer_data2 = 5443 + decimal1 = Arrow::Decimal128.new(integer_data1) + decimal2 = Arrow::Decimal128.new(integer_data2) + decimal3 = decimal1.plus(decimal2) + assert_equal(integer_data1 + integer_data2, decimal3.to_i) + end + + def test_minus + integer_data1 = 23423445 + integer_data2 = 5443 + decimal1 = Arrow::Decimal128.new(integer_data1) + decimal2 = Arrow::Decimal128.new(integer_data2) + decimal3 = decimal1.minus(decimal2) + assert_equal(integer_data1 - integer_data2, decimal3.to_i) + end + + def test_multiply + integer_data1 = 23423445 + integer_data2 = 5443 + decimal1 = Arrow::Decimal128.new(integer_data1) + decimal2 = Arrow::Decimal128.new(integer_data2) + decimal3 = decimal1.multiply(decimal2) + assert_equal(integer_data1 * integer_data2, decimal3.to_i) + end + + def test_divide + require_gi_bindings(3, 3, 0) + integer_data1 = 23423445 + integer_data2 = -5443 + decimal1 = Arrow::Decimal128.new(integer_data1) + decimal2 = Arrow::Decimal128.new(integer_data2) + result, remainder = decimal1.divide(decimal2) + assert_equal([ + integer_data1.quo(integer_data2).truncate, + integer_data1.remainder(integer_data2), + ], + [result.to_i, remainder.to_i]) + end + + def test_divide_zero + require_gi_bindings(3, 3, 0) + decimal1 = Arrow::Decimal128.new(23423445) + decimal2 = Arrow::Decimal128.new(0) + message = + "[decimal128][divide]: Invalid: Division by 0 in Decimal128" + assert_raise(Arrow::Error::Invalid.new(message)) do + decimal1.divide(decimal2) + end + end + + def test_equal + decimal = Arrow::Decimal128.new(10) + other_decimal1 = Arrow::Decimal128.new(10) + other_decimal2 = Arrow::Decimal128.new(11) + assert_equal([ + true, + false, + ], + [ + decimal == other_decimal1, + decimal == other_decimal2, + ]) + end + + def test_not_equal + require_gi_bindings(3, 3, 1) + decimal = Arrow::Decimal128.new(10) + other_decimal1 = Arrow::Decimal128.new(10) + other_decimal2 = Arrow::Decimal128.new(11) + assert_equal([ + false, + true, + ], + [ + decimal != other_decimal1, + decimal != other_decimal2, + ]) + end + + def test_less_than + require_gi_bindings(3, 3, 1) + decimal = Arrow::Decimal128.new(10) + other_decimal1 = Arrow::Decimal128.new(11) + other_decimal2 = Arrow::Decimal128.new(9) + assert_equal([ + true, + false, + false + ], + [ + decimal < other_decimal1, + decimal < other_decimal2, + decimal < decimal, + ]) + end + + def test_less_than_or_equal + require_gi_bindings(3, 3, 1) + decimal = Arrow::Decimal128.new(10) + other_decimal1 = Arrow::Decimal128.new(11) + other_decimal2 = Arrow::Decimal128.new(9) + assert_equal([ + true, + false, + true + ], + [ + decimal <= other_decimal1, + decimal <= other_decimal2, + decimal <= decimal + ]) + end + + def test_greater_than + require_gi_bindings(3, 3, 1) + decimal = Arrow::Decimal128.new(10) + other_decimal1 = Arrow::Decimal128.new(11) + other_decimal2 = Arrow::Decimal128.new(9) + assert_equal([ + false, + true, + false + ], + [ + decimal > other_decimal1, + decimal > other_decimal2, + decimal > decimal + ]) + end + + def test_greater_than_or_equal + require_gi_bindings(3, 3, 1) + decimal = Arrow::Decimal128.new(10) + other_decimal1 = Arrow::Decimal128.new(11) + other_decimal2 = Arrow::Decimal128.new(9) + assert_equal([ + false, + true, + true + ], + [ + decimal >= other_decimal1, + decimal >= other_decimal2, + decimal >= decimal + ]) + end + + def test_rescale + decimal = Arrow::Decimal128.new(10) + assert_equal(Arrow::Decimal128.new(1000), + decimal.rescale(1, 3)) + end + + def test_rescale_fail + decimal = Arrow::Decimal128.new(10) + message = + "[decimal128][rescale]: Invalid: " + + "Rescaling Decimal128 value would cause data loss" + assert_raise(Arrow::Error::Invalid.new(message)) do + decimal.rescale(1, -1) + end + end +end diff --git a/src/arrow/c_glib/test/test-decimal256-array.rb b/src/arrow/c_glib/test/test-decimal256-array.rb new file mode 100644 index 000000000..766f1a71a --- /dev/null +++ b/src/arrow/c_glib/test/test-decimal256-array.rb @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDecimal256Array < Test::Unit::TestCase + def test_format_value + data_type = Arrow::Decimal256DataType.new(8, 2) + builder = Arrow::Decimal256ArrayBuilder.new(data_type) + decimal = Arrow::Decimal256.new("23423445") + builder.append_value(decimal) + array = builder.finish + assert_equal("234234.45", array.format_value(0)) + end + + def test_value + data_type = Arrow::Decimal256DataType.new(8, 2) + builder = Arrow::Decimal256ArrayBuilder.new(data_type) + decimal = Arrow::Decimal256.new("23423445") + builder.append_value(decimal) + array = builder.finish + assert_equal("234234.45", + array.get_value(0).to_string_scale(array.value_data_type.scale)) + end +end diff --git a/src/arrow/c_glib/test/test-decimal256-data-type.rb b/src/arrow/c_glib/test/test-decimal256-data-type.rb new file mode 100644 index 000000000..596c3dab9 --- /dev/null +++ b/src/arrow/c_glib/test/test-decimal256-data-type.rb @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDecimal256DataType < Test::Unit::TestCase + def test_type + data_type = Arrow::Decimal256DataType.new(2, 0) + assert_equal(Arrow::Type::DECIMAL256, data_type.id) + end + + def test_name + data_type = Arrow::Decimal256DataType.new(2, 0) + assert_equal("decimal256", data_type.name) + end + + def test_to_s + data_type = Arrow::Decimal256DataType.new(2, 0) + assert_equal("decimal256(2, 0)", data_type.to_s) + end + + def test_precision + data_type = Arrow::Decimal256DataType.new(8, 2) + assert_equal(8, data_type.precision) + end + + def test_scale + data_type = Arrow::Decimal256DataType.new(8, 2) + assert_equal(2, data_type.scale) + end +end diff --git a/src/arrow/c_glib/test/test-decimal256-scalar.rb b/src/arrow/c_glib/test/test-decimal256-scalar.rb new file mode 100644 index 000000000..2c419940d --- /dev/null +++ b/src/arrow/c_glib/test/test-decimal256-scalar.rb @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDecimal256Scalar < Test::Unit::TestCase + def setup + @data_type = Arrow::Decimal256DataType.new(8, 2) + @value = Arrow::Decimal256.new("23423445") + @scalar = Arrow::Decimal256Scalar.new(@data_type, @value) + end + + def test_data_type + assert_equal(@data_type, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::Decimal256Scalar.new(@data_type, @value), + @scalar) + end + + def test_to_s + assert_equal("234234.45", @scalar.to_s) + end + + def test_value + assert_equal(@value, @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-decimal256.rb b/src/arrow/c_glib/test/test-decimal256.rb new file mode 100644 index 000000000..d422aef33 --- /dev/null +++ b/src/arrow/c_glib/test/test-decimal256.rb @@ -0,0 +1,220 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDecimal256 < Test::Unit::TestCase + include Helper::Omittable + + def test_copy + decimal = Arrow::Decimal256.new("234.23445") + assert_equal(decimal, decimal.copy) + end + + def test_to_string_scale + integer_data = 23423445 + string_data = "234.23445" + decimal = Arrow::Decimal256.new(integer_data) + assert_equal(string_data, decimal.to_string_scale(5)) + end + + def test_to_string + string_data = "99999999999999999999999999999999999999" + decimal = Arrow::Decimal256.new(string_data) + assert_equal(string_data, decimal.to_s) + end + + def test_to_bytes + decimal = Arrow::Decimal256.new("12.3") + assert_equal([123, 0, 0, 0].pack("q*"), + decimal.to_bytes.to_s) + end + + def test_abs + absolute_value = "23049223942343532412" + negative_value = "-23049223942343532412" + decimal = Arrow::Decimal256.new(negative_value) + decimal.abs + assert_equal(absolute_value, decimal.to_s) + end + + def test_negate + positive_value = "23049223942343532412" + negative_value = "-23049223942343532412" + decimal = Arrow::Decimal256.new(positive_value) + decimal.negate + assert_equal(negative_value, decimal.to_s) + decimal.negate + assert_equal(positive_value, decimal.to_s) + end + + def test_plus + integer_data1 = 23423445 + integer_data2 = 5443 + decimal1 = Arrow::Decimal256.new(integer_data1) + decimal2 = Arrow::Decimal256.new(integer_data2) + decimal3 = decimal1.plus(decimal2) + assert_equal((integer_data1 + integer_data2).to_s, + decimal3.to_s) + end + + def test_multiply + integer_data1 = 23423445 + integer_data2 = 5443 + decimal1 = Arrow::Decimal256.new(integer_data1) + decimal2 = Arrow::Decimal256.new(integer_data2) + decimal3 = decimal1.multiply(decimal2) + assert_equal((integer_data1 * integer_data2).to_s, + decimal3.to_s) + end + + def test_divide + require_gi_bindings(3, 3, 0) + integer_data1 = 23423445 + integer_data2 = -5443 + decimal1 = Arrow::Decimal256.new(integer_data1) + decimal2 = Arrow::Decimal256.new(integer_data2) + result, remainder = decimal1.divide(decimal2) + assert_equal([ + integer_data1.quo(integer_data2).truncate.to_s, + integer_data1.remainder(integer_data2).to_s, + ], + [result.to_s, remainder.to_s]) + end + + def test_divide_zero + require_gi_bindings(3, 3, 0) + decimal1 = Arrow::Decimal256.new(23423445) + decimal2 = Arrow::Decimal256.new(0) + message = + "[decimal256][divide]: Invalid: Division by 0 in Decimal256" + assert_raise(Arrow::Error::Invalid.new(message)) do + decimal1.divide(decimal2) + end + end + + def test_equal + decimal = Arrow::Decimal256.new(10) + other_decimal1 = Arrow::Decimal256.new(10) + other_decimal2 = Arrow::Decimal256.new(11) + assert_equal([ + true, + false, + ], + [ + decimal == other_decimal1, + decimal == other_decimal2, + ]) + end + + def test_not_equal + require_gi_bindings(3, 3, 1) + decimal = Arrow::Decimal256.new(10) + other_decimal1 = Arrow::Decimal256.new(10) + other_decimal2 = Arrow::Decimal256.new(11) + assert_equal([ + false, + true, + ], + [ + decimal != other_decimal1, + decimal != other_decimal2, + ]) + end + + def test_less_than + require_gi_bindings(3, 3, 1) + decimal = Arrow::Decimal256.new(10) + other_decimal1 = Arrow::Decimal256.new(11) + other_decimal2 = Arrow::Decimal256.new(9) + assert_equal([ + true, + false, + false + ], + [ + decimal < other_decimal1, + decimal < other_decimal2, + decimal < decimal, + ]) + end + + def test_less_than_or_equal + require_gi_bindings(3, 3, 1) + decimal = Arrow::Decimal256.new(10) + other_decimal1 = Arrow::Decimal256.new(11) + other_decimal2 = Arrow::Decimal256.new(9) + assert_equal([ + true, + false, + true + ], + [ + decimal <= other_decimal1, + decimal <= other_decimal2, + decimal <= decimal + ]) + end + + def test_greater_than + require_gi_bindings(3, 3, 1) + decimal = Arrow::Decimal256.new(10) + other_decimal1 = Arrow::Decimal256.new(11) + other_decimal2 = Arrow::Decimal256.new(9) + assert_equal([ + false, + true, + false + ], + [ + decimal > other_decimal1, + decimal > other_decimal2, + decimal > decimal + ]) + end + + def test_greater_than_or_equal + require_gi_bindings(3, 3, 1) + decimal = Arrow::Decimal256.new(10) + other_decimal1 = Arrow::Decimal256.new(11) + other_decimal2 = Arrow::Decimal256.new(9) + assert_equal([ + false, + true, + true + ], + [ + decimal >= other_decimal1, + decimal >= other_decimal2, + decimal >= decimal + ]) + end + + def test_rescale + decimal = Arrow::Decimal256.new(10) + assert_equal(Arrow::Decimal256.new(1000), + decimal.rescale(1, 3)) + end + + def test_rescale_fail + decimal = Arrow::Decimal256.new(10) + message = + "[decimal256][rescale]: Invalid: " + + "Rescaling Decimal256 value would cause data loss" + assert_raise(Arrow::Error::Invalid.new(message)) do + decimal.rescale(1, -1) + end + end +end diff --git a/src/arrow/c_glib/test/test-dense-union-array.rb b/src/arrow/c_glib/test/test-dense-union-array.rb new file mode 100644 index 000000000..ecd17d5a0 --- /dev/null +++ b/src/arrow/c_glib/test/test-dense-union-array.rb @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDenseUnionArray < Test::Unit::TestCase + include Helper::Buildable + + sub_test_case(".new") do + sub_test_case("default") do + def setup + type_ids = build_int8_array([0, 1, 0, 1, 1]) + value_offsets = build_int32_array([0, 0, 1, 1, 2]) + fields = [ + build_int16_array([1, nil]), + build_string_array(["a", "b", "c"]), + ] + @array = Arrow::DenseUnionArray.new(type_ids, value_offsets, fields) + end + + def test_value_data_type + fields = [ + Arrow::Field.new("0", Arrow::Int16DataType.new), + Arrow::Field.new("1", Arrow::StringDataType.new), + ] + assert_equal(Arrow::DenseUnionDataType.new(fields, [0, 1]), + @array.value_data_type) + end + + def test_field + assert_equal([ + build_int16_array([1, nil]), + build_string_array(["a", "b", "c"]), + ], + [ + @array.get_field(0), + @array.get_field(1), + ]) + end + end + + sub_test_case("DataType") do + def setup + data_type_fields = [ + Arrow::Field.new("number", Arrow::Int16DataType.new), + Arrow::Field.new("text", Arrow::StringDataType.new), + ] + type_codes = [11, 13] + @data_type = Arrow::DenseUnionDataType.new(data_type_fields, type_codes) + type_ids = build_int8_array([11, 13, 11, 13, 13]) + value_offsets = build_int32_array([0, 0, 1, 1, 2]) + fields = [ + build_int16_array([1, nil]), + build_string_array(["a", "b", "c"]) + ] + @array = Arrow::DenseUnionArray.new(@data_type, type_ids, value_offsets, fields) + end + + def test_value_data_type + assert_equal(@data_type, + @array.value_data_type) + end + + def test_field + assert_equal([ + build_int16_array([1, nil]), + build_string_array(["a", "b", "c"]), + ], + [ + @array.get_field(0), + @array.get_field(1), + ]) + end + end + end +end diff --git a/src/arrow/c_glib/test/test-dense-union-data-type.rb b/src/arrow/c_glib/test/test-dense-union-data-type.rb new file mode 100644 index 000000000..71b045e06 --- /dev/null +++ b/src/arrow/c_glib/test/test-dense-union-data-type.rb @@ -0,0 +1,64 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDenseUnionDataType < Test::Unit::TestCase + def setup + @number_field_data_type = Arrow::Int32DataType.new + @text_field_data_type = Arrow::StringDataType.new + @field_data_types = [ + @number_field_data_type, + @text_field_data_type, + ] + @number_field = Arrow::Field.new("number", @number_field_data_type) + @text_field = Arrow::Field.new("text", @text_field_data_type) + @fields = [ + @number_field, + @text_field, + ] + @data_type = Arrow::DenseUnionDataType.new(@fields, [2, 9]) + end + + def test_type + assert_equal(Arrow::Type::DENSE_UNION, @data_type.id) + end + + def test_name + assert_equal("dense_union", @data_type.name) + end + + def test_to_s + assert_equal("dense_union<number: int32=2, text: string=9>", + @data_type.to_s) + end + + def test_fields + assert_equal(@fields.zip(@field_data_types), + @data_type.fields.collect {|field| [field, field.data_type]}) + end + + def test_get_field + field = @data_type.get_field(0) + assert_equal([ + @fields[0], + @field_data_types[0], + ], + [ + field, + field.data_type, + ]) + end +end diff --git a/src/arrow/c_glib/test/test-dense-union-scalar.rb b/src/arrow/c_glib/test/test-dense-union-scalar.rb new file mode 100644 index 000000000..4a3e5c0de --- /dev/null +++ b/src/arrow/c_glib/test/test-dense-union-scalar.rb @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDenseUnionScalar < Test::Unit::TestCase + def setup + fields = [ + Arrow::Field.new("number", Arrow::Int8DataType.new), + Arrow::Field.new("text", Arrow::StringDataType.new), + ] + @data_type = Arrow::DenseUnionDataType.new(fields, [2, 9]) + @type_code = 2 + @value = Arrow::Int8Scalar.new(-29) + @scalar = Arrow::DenseUnionScalar.new(@data_type, @type_code, @value) + end + + def test_type_code + assert_equal(@type_code, + @scalar.type_code) + end + + def test_data_type + assert_equal(@data_type, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::DenseUnionScalar.new(@data_type, @type_code, @value), + @scalar) + end + + def test_to_s + assert_equal("union{number: int8 = -29}", @scalar.to_s) + end + + def test_value + assert_equal(@value, @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-dictionary-array-builder.rb b/src/arrow/c_glib/test/test-dictionary-array-builder.rb new file mode 100644 index 000000000..4531e44f4 --- /dev/null +++ b/src/arrow/c_glib/test/test-dictionary-array-builder.rb @@ -0,0 +1,395 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDictinaryArrayBuilder < Test::Unit::TestCase + include Helper::Buildable + + def setup + @values = [ + *%w(foo bar foo), + nil, + *%w(foo baz bar baz baz) + ] + end + + sub_test_case("BinaryDictionaryArrayBuilder") do + sub_test_case("constructed from empty") do + def setup + super + + @dictionary = %w(foo bar baz) + @dictionary_array = build_binary_array(@dictionary) + @indices = @values.map {|x| x ? @dictionary.index(x) : nil } + @indices_array = build_int8_array(@indices) + @data_type = Arrow::DictionaryDataType.new(@indices_array.value_data_type, + @dictionary_array.value_data_type, + false) + @expected_array = Arrow::DictionaryArray.new(@data_type, + @indices_array, + @dictionary_array) + @builder = Arrow::BinaryDictionaryArrayBuilder.new + @values.each do |value| + if value + @builder.append_value_bytes(value) + else + @builder.append_null + end + end + end + + test("append_value") do + dictionary_array = build_binary_array([*@dictionary, "qux"]) + indices_array = build_int8_array([*@indices, 3]) + expected_array = Arrow::DictionaryArray.new(@data_type, + indices_array, + dictionary_array) + + @builder.append_value("qux") + assert_equal(expected_array, @builder.finish) + end + + test("append_value_bytes") do + dictionary_array = build_binary_array([*@dictionary, "qux"]) + indices_array = build_int8_array([*@indices, 3]) + expected_array = Arrow::DictionaryArray.new(@data_type, + indices_array, + dictionary_array) + + @builder.append_value_bytes("qux") + assert_equal(expected_array, @builder.finish) + end + + test("append_array") do + dictionary_array = build_binary_array([*@dictionary, "qux"]) + indices_array = build_int8_array([*@indices, 3, 0, nil, 2]) + expected_array = Arrow::DictionaryArray.new(@data_type, + indices_array, + dictionary_array) + + @builder.append_array(build_binary_array(["qux", "foo", nil, "baz"])) + assert_equal(expected_array, @builder.finish) + end + + test("append_indices") do + @builder.insert_memo_values(build_binary_array(["qux"])) + dictionary_array = build_binary_array([*@dictionary, "qux"]) + indices_array = build_int8_array([*@indices, 1, 2, nil, 3, 0, 1, 2, 1, 3, 0]) + expected_array = Arrow::DictionaryArray.new(@data_type, + indices_array, + dictionary_array) + + @builder.append_indices([1, 2, 1, 3, 0], + [true, true, false, true, true]) + @builder.append_indices([1, 2, 1, 3, 0]) + assert_equal(expected_array, @builder.finish) + end + + test("append_nulls") do + dictionary_array = build_binary_array([]) + indices_array = build_int8_array([nil, nil, nil]) + data_type = Arrow::DictionaryDataType.new(indices_array.value_data_type, + dictionary_array.value_data_type, + false) + expected_array = Arrow::DictionaryArray.new(data_type, + indices_array, + dictionary_array) + builder = Arrow::BinaryDictionaryArrayBuilder.new + builder.append_nulls(3) + assert_equal(expected_array, + builder.finish) + end + + test("append_empty_values") do + dictionary_array = build_binary_array(["hello"]) + indices_array = build_int8_array([0, 0, 0, 0]) + data_type = Arrow::DictionaryDataType.new(indices_array.value_data_type, + dictionary_array.value_data_type, + false) + expected_array = Arrow::DictionaryArray.new(data_type, + indices_array, + dictionary_array) + builder = Arrow::BinaryDictionaryArrayBuilder.new + builder.append_value("hello") + builder.append_empty_value + builder.append_empty_values(2) + assert_equal(expected_array, + builder.finish) + end + + test("dictionary_length") do + assert_equal(@dictionary.length, @builder.dictionary_length) + end + + test("finish") do + assert_equal(@expected_array, + @builder.finish) + end + + test("finish_delta") do + assert_equal([ + true, + @indices_array, + @dictionary_array, + ], + @builder.finish_delta) + end + + test("reset") do + expected_array = Arrow::DictionaryArray.new(@data_type, + build_int8_array([]), + @dictionary_array) + @builder.reset + assert_equal({ + dictionary_length: @dictionary.length, + array: expected_array, + }, + { + dictionary_length: @builder.dictionary_length, + array: @builder.finish, + }) + end + + test("reset_full") do + expected_array = Arrow::DictionaryArray.new(@data_type, + build_int8_array([]), + build_binary_array([])) + @builder.reset_full + assert_equal({ + dictionary_length: 0, + array: expected_array, + }, + { + dictionary_length: @builder.dictionary_length, + array: @builder.finish, + }) + end + end + + sub_test_case("constructed with memo values") do + def setup + super + + @dictionary = %w(qux foo bar baz) + dictionary_array = build_binary_array(@dictionary) + indices = @values.map {|x| x ? @dictionary.index(x) : nil } + indices_array = build_int8_array(indices) + data_type = Arrow::DictionaryDataType.new(indices_array.value_data_type, + dictionary_array.value_data_type, + false) + @expected_array = Arrow::DictionaryArray.new(data_type, + indices_array, + dictionary_array) + + @builder = Arrow::BinaryDictionaryArrayBuilder.new + @builder.insert_memo_values(dictionary_array) + @values.each do |value| + if value + @builder.append_value_bytes(value) + else + @builder.append_null + end + end + end + + test("dictionary_length") do + assert_equal(@dictionary.length, @builder.dictionary_length) + end + + test("finish") do + assert_equal(@expected_array, @builder.finish) + end + end + end + + sub_test_case("StringDictionaryArrayBuilder") do + sub_test_case("constructed from empty") do + def setup + super + + @dictionary = %w(foo bar baz) + @dictionary_array = build_string_array(@dictionary) + @indices = @values.map {|x| x ? @dictionary.index(x) : nil } + @indices_array = build_int8_array(@indices) + @data_type = Arrow::DictionaryDataType.new(@indices_array.value_data_type, + @dictionary_array.value_data_type, + false) + @expected_array = Arrow::DictionaryArray.new(@data_type, + @indices_array, + @dictionary_array) + @builder = Arrow::StringDictionaryArrayBuilder.new + @values.each do |value| + if value + @builder.append_string(value) + else + @builder.append_null + end + end + end + + test("append_string") do + dictionary_array = build_string_array([*@dictionary, "qux"]) + indices_array = build_int8_array([*@indices, 3]) + expected_array = Arrow::DictionaryArray.new(@data_type, + indices_array, + dictionary_array) + + @builder.append_string("qux") + assert_equal(expected_array, @builder.finish) + end + + test("append_array") do + dictionary_array = build_string_array([*@dictionary, "qux"]) + indices_array = build_int8_array([*@indices, 3, 0, nil, 2]) + expected_array = Arrow::DictionaryArray.new(@data_type, + indices_array, + dictionary_array) + + @builder.append_array(build_string_array(["qux", "foo", nil, "baz"])) + assert_equal(expected_array, @builder.finish) + end + + test("append_indices") do + @builder.insert_memo_values(build_string_array(["qux"])) + dictionary_array = build_string_array([*@dictionary, "qux"]) + indices_array = build_int8_array([*@indices, 1, 2, nil, 3, 0, 1, 2, 1, 3, 0]) + expected_array = Arrow::DictionaryArray.new(@data_type, + indices_array, + dictionary_array) + + @builder.append_indices([1, 2, 1, 3, 0], + [true, true, false, true, true]) + @builder.append_indices([1, 2, 1, 3, 0]) + assert_equal(expected_array, @builder.finish) + end + + test("append_nulls") do + dictionary_array = build_string_array([]) + indices_array = build_int8_array([nil, nil, nil]) + data_type = Arrow::DictionaryDataType.new(indices_array.value_data_type, + dictionary_array.value_data_type, + false) + expected_array = Arrow::DictionaryArray.new(data_type, + indices_array, + dictionary_array) + builder = Arrow::StringDictionaryArrayBuilder.new + builder.append_nulls(3) + assert_equal(expected_array, + builder.finish) + end + + test("append_empty_values") do + dictionary_array = build_string_array(["hello"]) + indices_array = build_int8_array([0, 0, 0, 0]) + data_type = Arrow::DictionaryDataType.new(indices_array.value_data_type, + dictionary_array.value_data_type, + false) + expected_array = Arrow::DictionaryArray.new(data_type, + indices_array, + dictionary_array) + builder = Arrow::StringDictionaryArrayBuilder.new + builder.append_string("hello") + builder.append_empty_value + builder.append_empty_values(2) + assert_equal(expected_array, + builder.finish) + end + + test("dictionary_length") do + assert_equal(@dictionary.length, @builder.dictionary_length) + end + + test("finish") do + assert_equal(@expected_array, + @builder.finish) + end + + test("finish_delta") do + assert_equal([ + true, + @indices_array, + @dictionary_array, + ], + @builder.finish_delta) + end + + test("reset") do + expected_array = Arrow::DictionaryArray.new(@data_type, + build_int8_array([]), + @dictionary_array) + @builder.reset + assert_equal({ + dictionary_length: @dictionary.length, + array: expected_array, + }, + { + dictionary_length: @builder.dictionary_length, + array: @builder.finish, + }) + end + + test("reset_full") do + expected_array = Arrow::DictionaryArray.new(@data_type, + build_int8_array([]), + build_string_array([])) + @builder.reset_full + assert_equal({ + dictionary_length: 0, + array: expected_array, + }, + { + dictionary_length: @builder.dictionary_length, + array: @builder.finish, + }) + end + end + + sub_test_case("constructed with memo values") do + def setup + super + + @dictionary = %w(qux foo bar baz) + dictionary_array = build_string_array(@dictionary) + indices = @values.map {|x| x ? @dictionary.index(x) : nil } + indices_array = build_int8_array(indices) + data_type = Arrow::DictionaryDataType.new(indices_array.value_data_type, + dictionary_array.value_data_type, + false) + @expected_array = Arrow::DictionaryArray.new(data_type, + indices_array, + dictionary_array) + + @builder = Arrow::StringDictionaryArrayBuilder.new + @builder.insert_memo_values(dictionary_array) + @values.each do |value| + if value + @builder.append_string(value) + else + @builder.append_null + end + end + end + + test("dictionary_length") do + assert_equal(@dictionary.length, @builder.dictionary_length) + end + + test("finish") do + assert_equal(@expected_array, @builder.finish) + end + end + end +end diff --git a/src/arrow/c_glib/test/test-dictionary-array.rb b/src/arrow/c_glib/test/test-dictionary-array.rb new file mode 100644 index 000000000..0f5157869 --- /dev/null +++ b/src/arrow/c_glib/test/test-dictionary-array.rb @@ -0,0 +1,78 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDictionaryArray < Test::Unit::TestCase + include Helper::Buildable + + def setup + @index_data_type = Arrow::Int32DataType.new + @dictionary = build_string_array(["C", "C++", "Ruby"]) + @ordered = false + @data_type = Arrow::DictionaryDataType.new(@index_data_type, + @dictionary.value_data_type, + @ordered) + end + + sub_test_case(".new") do + def test_new + indices = build_int32_array([0, 2, 2, 1, 0]) + dictionary_array = Arrow::DictionaryArray.new(@data_type, + indices, + @dictionary) + assert_equal(<<-STRING.chomp, dictionary_array.to_s) + +-- dictionary: + [ + "C", + "C++", + "Ruby" + ] +-- indices: + [ + 0, + 2, + 2, + 1, + 0 + ] + STRING + end + end + + sub_test_case("instance methods") do + def setup + super + @indices = build_int32_array([0, 2, 2, 1, 0]) + @dictionary_array = Arrow::DictionaryArray.new(@data_type, + @indices, + @dictionary) + end + + def test_indices + assert_equal(@indices, @dictionary_array.indices) + end + + def test_dictionary + assert_equal(@dictionary, @dictionary_array.dictionary) + end + + def test_dictionary_data_type + assert_equal(@data_type, + @dictionary_array.dictionary_data_type) + end + end +end diff --git a/src/arrow/c_glib/test/test-dictionary-data-type.rb b/src/arrow/c_glib/test/test-dictionary-data-type.rb new file mode 100644 index 000000000..cdc42edcd --- /dev/null +++ b/src/arrow/c_glib/test/test-dictionary-data-type.rb @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDictionaryDataType < Test::Unit::TestCase + include Helper::Buildable + + def setup + @index_data_type = Arrow::Int32DataType.new + @value_data_type = Arrow::StringDataType.new + @ordered = true + @data_type = Arrow::DictionaryDataType.new(@index_data_type, + @value_data_type, + @ordered) + end + + def test_type + assert_equal(Arrow::Type::DICTIONARY, @data_type.id) + end + + def test_name + assert_equal("dictionary", @data_type.name) + end + + def test_to_s + assert_equal("dictionary<values=string, indices=int32, ordered=1>", + @data_type.to_s) + end + + def test_bit_width + assert_equal(32, @data_type.bit_width) + end + + def test_index_data_type + assert_equal(@index_data_type, @data_type.index_data_type) + end + + def test_value_data_type + assert_equal(@value_data_type, @data_type.value_data_type) + end + + def test_ordered? + assert do + @data_type.ordered? + end + end +end diff --git a/src/arrow/c_glib/test/test-dictionary-encode.rb b/src/arrow/c_glib/test/test-dictionary-encode.rb new file mode 100644 index 000000000..100494c9e --- /dev/null +++ b/src/arrow/c_glib/test/test-dictionary-encode.rb @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDictionaryEncode < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + def test_int32 + array = build_int32_array([1, 3, 1, -1, -3, -1]) + assert_equal(<<-STRING.chomp, array.dictionary_encode.to_s) + +-- dictionary: + [ + 1, + 3, + -1, + -3 + ] +-- indices: + [ + 0, + 1, + 0, + 2, + 3, + 2 + ] + STRING + end + + def test_string + array = build_string_array(["Ruby", "Python", "Ruby"]) + assert_equal(<<-STRING.chomp, array.dictionary_encode.to_s) + +-- dictionary: + [ + "Ruby", + "Python" + ] +-- indices: + [ + 0, + 1, + 0 + ] + STRING + end +end diff --git a/src/arrow/c_glib/test/test-double-array.rb b/src/arrow/c_glib/test/test-double-array.rb new file mode 100644 index 000000000..4f30b59a0 --- /dev/null +++ b/src/arrow/c_glib/test/test-double-array.rb @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDoubleArray < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + def test_new + assert_equal(build_double_array([-1.1, 2.2, nil]), + Arrow::DoubleArray.new(3, + Arrow::Buffer.new([-1.1, 2.2].pack("d*")), + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_buffer + builder = Arrow::DoubleArrayBuilder.new + builder.append_value(-1.1) + builder.append_value(2.2) + builder.append_value(-4.4) + array = builder.finish + assert_equal([-1.1, 2.2, -4.4].pack("d*"), array.buffer.data.to_s) + end + + def test_value + builder = Arrow::DoubleArrayBuilder.new + builder.append_value(1.5) + array = builder.finish + assert_in_delta(1.5, array.get_value(0)) + end + + def test_values + require_gi_bindings(3, 1, 7) + builder = Arrow::DoubleArrayBuilder.new + builder.append_value(1.5) + builder.append_value(3) + builder.append_value(4.5) + array = builder.finish + assert_equal([1.5, 3.0, 4.5], array.values) + end + + def test_sum + array = build_float_array([1.5, 3.0, nil]) + assert_in_delta(4.5, array.sum) + end +end diff --git a/src/arrow/c_glib/test/test-double-data-type.rb b/src/arrow/c_glib/test/test-double-data-type.rb new file mode 100644 index 000000000..5a56ebeac --- /dev/null +++ b/src/arrow/c_glib/test/test-double-data-type.rb @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDoubleDataType < Test::Unit::TestCase + def test_type + data_type = Arrow::DoubleDataType.new + assert_equal(Arrow::Type::DOUBLE, data_type.id) + end + + def test_name + data_type = Arrow::DoubleDataType.new + assert_equal("double", data_type.name) + end + + def test_to_s + data_type = Arrow::DoubleDataType.new + assert_equal("double", data_type.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-double-scalar.rb b/src/arrow/c_glib/test/test-double-scalar.rb new file mode 100644 index 000000000..eea673b41 --- /dev/null +++ b/src/arrow/c_glib/test/test-double-scalar.rb @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDoubleScalar < Test::Unit::TestCase + def setup + @scalar = Arrow::DoubleScalar.new(1.1) + end + + def test_data_type + assert_equal(Arrow::DoubleDataType.new, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + options = Arrow::EqualOptions.new + options.approx = true + assert do + @scalar.equal_options(Arrow::DoubleScalar.new(1.1), options) + end + end + + def test_to_s + assert_equal("1.1", @scalar.to_s) + end + + def test_value + assert_in_delta(1.1, @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-equal-options.rb b/src/arrow/c_glib/test/test-equal-options.rb new file mode 100644 index 000000000..4ea1979a7 --- /dev/null +++ b/src/arrow/c_glib/test/test-equal-options.rb @@ -0,0 +1,96 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestEqualOptions < Test::Unit::TestCase + include Helper::Buildable + + sub_test_case("approx") do + def setup + @options = Arrow::EqualOptions.new + end + + def test_accessor + assert do + not @options.approx? + end + @options.approx = true + assert do + @options.approx? + end + end + + def test_compare + array1 = build_float_array([0.01]) + array2 = build_float_array([0.010001]) + @options.approx = true + assert do + array1.equal_options(array2, @options) + end + end + end + + sub_test_case("nans-equal") do + def setup + @options = Arrow::EqualOptions.new + end + + def test_accessor + assert do + not @options.nans_equal? + end + @options.nans_equal = true + assert do + @options.nans_equal? + end + end + + def test_compare + array1 = build_float_array([0.1, Float::NAN, 0.2]) + array2 = build_float_array([0.1, Float::NAN, 0.2]) + @options.nans_equal = true + assert do + array1.equal_options(array2, @options) + end + end + end + + sub_test_case("absolute-tolerance") do + def setup + @options = Arrow::EqualOptions.new + end + + def test_accessor + assert do + @options.absolute_tolerance < 0.001 + end + @options.absolute_tolerance = 0.001 + assert do + @options.absolute_tolerance >= 0.001 + end + end + + def test_compare + array1 = build_float_array([0.01]) + array2 = build_float_array([0.0109]) + @options.approx = true + @options.absolute_tolerance = 0.001 + assert do + array1.equal_options(array2, @options) + end + end + end +end diff --git a/src/arrow/c_glib/test/test-execute-plan.rb b/src/arrow/c_glib/test/test-execute-plan.rb new file mode 100644 index 000000000..d698e1e31 --- /dev/null +++ b/src/arrow/c_glib/test/test-execute-plan.rb @@ -0,0 +1,53 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestExecutePlan < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + def setup + @record_batch = + build_record_batch(number: build_int8_array([1, 2, 3, 4, 5]), + string: build_string_array(["a", "b", "a", "b", "a"])) + @plan = Arrow::ExecutePlan.new + @source_node_options = Arrow::SourceNodeOptions.new(@record_batch) + @source_node = @plan.build_source_node(@source_node_options) + aggregations = [ + Arrow::Aggregation.new("hash_sum", nil, "number", "sum(number)"), + Arrow::Aggregation.new("hash_count", nil, "number", "count(number)"), + ] + @aggregate_node_options = + Arrow::AggregateNodeOptions.new(aggregations, ["string"]) + @aggregate_node = @plan.build_aggregate_node(@source_node, + @aggregate_node_options) + @sink_node_options = Arrow::SinkNodeOptions.new + @sink_node = @plan.build_sink_node(@aggregate_node, + @sink_node_options) + end + + def test_start + @plan.validate + @plan.start + @plan.wait + reader = @sink_node_options.get_reader(@aggregate_node.output_schema) + assert_equal(build_table("sum(number)" => build_int64_array([9, 6]), + "count(number)" => build_int64_array([3, 2]), + "string" => build_string_array(["a", "b"])), + reader.read_all) + @plan.stop + end +end diff --git a/src/arrow/c_glib/test/test-extension-data-type.rb b/src/arrow/c_glib/test/test-extension-data-type.rb new file mode 100644 index 000000000..59c6395e9 --- /dev/null +++ b/src/arrow/c_glib/test/test-extension-data-type.rb @@ -0,0 +1,105 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestExtensionDataType < Test::Unit::TestCase + class UUIDArray < Arrow::ExtensionArray + type_register + end + + class UUIDDataType < Arrow::ExtensionDataType + type_register + + def initialize + super(storage_data_type: Arrow::FixedSizeBinaryDataType.new(16)) + end + + # TODO + # def get_extension_name_impl + # "uuid" + # end + + # TODO + # def get_array_gtype_impl + # UUIDArray.gtype + # end + end + + include Helper::Buildable + + def test_type + data_type = UUIDDataType.new + assert_equal(Arrow::Type::EXTENSION, data_type.id) + end + + def test_name + data_type = UUIDDataType.new + assert_equal("extension", data_type.name) + end + + def test_to_s + omit("gobject-introspection gem doesn't support implementing methods for GLib object yet") + data_type = UUIDDataType.new + assert_equal("extension<uuid>", data_type.to_s) + end + + def test_storage_data_type + data_type = UUIDDataType.new + assert_equal(Arrow::FixedSizeBinaryDataType.new(16), + data_type.storage_data_type) + end + + def test_extension_name + omit("gobject-introspection gem doesn't support implementing methods for GLib object yet") + data_type = UUIDDataType.new + assert_equal("uuid", data_type.extension_name) + end + + def test_wrap_array + omit("gobject-introspection gem doesn't support implementing methods for GLib object yet") + data_type = UUIDDataType.new + storage = build_fixed_size_binary_array(data_type.storage_data_type, + ["a" * 16, nil, "c" * 16]) + extension_array = data_type.wrap_array(storage) + assert_equal([ + UUIDArray, + storage, + ], + [ + extension_array.class, + extension_array.storage, + ]) + end + + def test_wrap_chunked_array + omit("gobject-introspection gem doesn't support implementing methods for GLib object yet") + data_type = UUIDDataType.new + storage1 = build_fixed_size_binary_array(data_type.storage_data_type, + ["a" * 16, nil]) + storage2 = build_fixed_size_binary_array(data_type.storage_data_type, + ["c" * 16]) + chunkd_array = Arrow::ChunkedArray.new([storage1, storage2]) + extension_chunked_array = data_type.wrap_chunked_array(chunked_array) + assert_equal([ + data_type, + [UUIDArray] * chunked_array.size, + ], + [ + extension_chunked_array.get_value_data_type, + extension_chunked_array.chunks.collect(&:class), + ]) + end +end diff --git a/src/arrow/c_glib/test/test-feather-file-reader.rb b/src/arrow/c_glib/test/test-feather-file-reader.rb new file mode 100644 index 000000000..a9380bb1a --- /dev/null +++ b/src/arrow/c_glib/test/test-feather-file-reader.rb @@ -0,0 +1,71 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFeatherFileReader < Test::Unit::TestCase + include Helper::Buildable + + def setup_file(table) + tempfile = Tempfile.open("arrow-feather-file-reader") + output = Arrow::FileOutputStream.new(tempfile.path, false) + begin + table.write_as_feather(output) + ensure + output.close + end + + input = Arrow::MemoryMappedInputStream.new(tempfile.path) + begin + reader = Arrow::FeatherFileReader.new(input) + yield(reader) + ensure + input.close + end + end + + test("#read") do + table = build_table("message" => build_string_array(["Login"]), + "is_critical" => build_boolean_array([true])) + setup_file(table) do |reader| + assert do + reader.version >= 2 + end + assert_equal(table, reader.read) + end + end + + test("#read_indices") do + table = build_table("message" => build_string_array(["Login"]), + "is_critical" => build_boolean_array([true]), + "host" => build_string_array(["www"])) + setup_file(table) do |reader| + assert_equal(build_table("message" => build_string_array(["Login"]), + "host" => build_string_array(["www"])), + reader.read_indices([2, 0])) + end + end + + test("#read_names") do + table = build_table("message" => build_string_array(["Login"]), + "is_critical" => build_boolean_array([true]), + "host" => build_string_array(["www"])) + setup_file(table) do |reader| + assert_equal(build_table("message" => build_string_array(["Login"]), + "host" => build_string_array(["www"])), + reader.read_names(["message", "host"])) + end + end +end diff --git a/src/arrow/c_glib/test/test-field-expression.rb b/src/arrow/c_glib/test/test-field-expression.rb new file mode 100644 index 000000000..cdcfab71e --- /dev/null +++ b/src/arrow/c_glib/test/test-field-expression.rb @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFieldExpression < Test::Unit::TestCase + def setup + @expression = Arrow::FieldExpression.new("visible") + end + + sub_test_case("#initialize") do + def test_invalid_dot_path + message = + "[field-expression][new]: Invalid: " + + "Dot path '.[' contained an unterminated index" + assert_raise(Arrow::Error::Invalid.new(message)) do + Arrow::FieldExpression.new(".[") + end + end + end + + sub_test_case("==") do + def test_true + assert_equal(Arrow::FieldExpression.new("visible"), + Arrow::FieldExpression.new(".visible")) + end + + def test_false + assert_not_equal(@expression, + Arrow::FieldExpression.new("equal")) + end + end + + def test_to_string + assert_equal("visible", @expression.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-field.rb b/src/arrow/c_glib/test/test-field.rb new file mode 100644 index 000000000..fa341de26 --- /dev/null +++ b/src/arrow/c_glib/test/test-field.rb @@ -0,0 +1,125 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestField < Test::Unit::TestCase + include Helper::Omittable + + def setup + @data_type = Arrow::BooleanDataType.new + @field = Arrow::Field.new("enabled", @data_type) + @field_with_metadata = @field.with_metadata("key1" => "value1", + "key2" => "value2") + end + + def test_export + require_gi_bindings(3, 4, 8) + c_abi_schema = @field.export + assert_equal(@field, + Arrow::Field.import(c_abi_schema)) + end + + def test_equal + assert_equal(Arrow::Field.new("enabled", Arrow::BooleanDataType.new), + Arrow::Field.new("enabled", Arrow::BooleanDataType.new)) + end + + def test_name + assert_equal("enabled", @field.name) + end + + def test_data_type + assert_equal(@data_type.to_s, + @field.data_type.to_s) + end + + def test_nullable? + assert do + @field.nullable? + end + end + + def test_to_s + assert_equal("enabled: bool", @field_with_metadata.to_s) + end + + sub_test_case("#to_string_metadata") do + def test_true + assert_equal(<<-FIELD.chomp, @field_with_metadata.to_string_metadata(true)) +enabled: bool +-- metadata -- +key1: value1 +key2: value2 + FIELD + end + + def test_false + assert_equal(<<-FIELD.chomp, @field_with_metadata.to_string_metadata(false)) +enabled: bool + FIELD + end + end + + sub_test_case("#has_metadata?") do + def test_existent + assert do + @field_with_metadata.has_metadata? + end + end + + def test_nonexistent + assert do + not @field.has_metadata? + end + end + end + + sub_test_case("#metadata") do + def test_existent + assert_equal({ + "key1" => "value1", + "key2" => "value2", + }, + @field_with_metadata.metadata) + end + + def test_nonexistent + assert_nil(@field.metadata) + end + end + + def test_with_metadata + field = @field_with_metadata.with_metadata("key3" => "value3") + assert_equal({"key3" => "value3"}, + field.metadata) + end + + def test_with_merged_metadata + field = @field_with_metadata.with_merged_metadata("key1" => "new-value1", + "key3" => "value3") + assert_equal({ + "key1" => "new-value1", + "key2" => "value2", + "key3" => "value3", + }, + field.metadata) + end + + def test_remove_metadata + field = @field_with_metadata.remove_metadata + assert_nil(field.metadata) + end +end diff --git a/src/arrow/c_glib/test/test-file-info.rb b/src/arrow/c_glib/test/test-file-info.rb new file mode 100644 index 000000000..e6a3a0d62 --- /dev/null +++ b/src/arrow/c_glib/test/test-file-info.rb @@ -0,0 +1,170 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFileInfo < Test::Unit::TestCase + def setup + @file_info = Arrow::FileInfo.new + end + + sub_test_case("#type") do + test("default") do + assert_equal(Arrow::FileType::UNKNOWN, + @file_info.type) + end + end + + test("#type=") do + @file_info.type = :dir + assert_equal(Arrow::FileType::DIR, + @file_info.type) + end + + sub_test_case("#path") do + test("default") do + assert_equal("", @file_info.path) + end + end + + test("#path=") do + @file_info.path = "/a/b/c.d" + assert_equal("/a/b/c.d", + @file_info.path) + end + + sub_test_case("#base_name") do + test("default") do + assert_equal("", @file_info.base_name) + end + + test("with directory") do + @file_info.path = "/a/b/c.d" + assert_equal("c.d", @file_info.base_name) + end + end + + sub_test_case("#dir_name") do + test("default") do + assert_equal("", @file_info.dir_name) + end + + test("with directory") do + @file_info.path = "/a/b/c.d" + assert_equal("/a/b", @file_info.dir_name) + end + end + + sub_test_case("#extension") do + test("default") do + assert_equal("", @file_info.extension) + end + + test("exist") do + @file_info.path = "/a/b/c.d" + assert_equal("d", @file_info.extension) + end + end + + sub_test_case("#size") do + test("default") do + assert_equal(-1, @file_info.size) + end + end + + sub_test_case("#mtime") do + test("default") do + assert_equal(-1, @file_info.mtime) + end + end + + sub_test_case("#==") do + def setup + super + @other_file_info = Arrow::FileInfo.new + end + + test("all the properties are the same") do + assert do + @file_info == @other_file_info + end + end + + test("the different type") do + @other_file_info.type = Arrow::FileType::FILE + assert do + @file_info != @other_file_info + end + end + + test("the different path") do + @other_file_info.path = "/a/b/c" + assert do + @file_info != @other_file_info + end + end + + test("the different size") do + @other_file_info.size = 42 + assert do + @file_info != @other_file_info + end + end + + test("the different mtime") do + @other_file_info.mtime = Time.now.to_i + assert do + @file_info != @other_file_info + end + end + end + + sub_test_case("#file?") do + test("true") do + @file_info.type = :file + assert do + @file_info.file? + end + end + + test("false") do + @file_info.type = :dir + assert do + not @file_info.file? + end + end + end + + sub_test_case("#dir?") do + test("true") do + @file_info.type = :dir + assert do + @file_info.dir? + end + end + + test("false") do + @file_info.type = :file + assert do + not @file_info.dir? + end + end + end + + test("#to_s") do + assert_equal("FileInfo(FileType::Unknown, )", + @file_info.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-file-input-stream.rb b/src/arrow/c_glib/test/test-file-input-stream.rb new file mode 100644 index 000000000..2b43f97f5 --- /dev/null +++ b/src/arrow/c_glib/test/test-file-input-stream.rb @@ -0,0 +1,102 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFileInputStream < Test::Unit::TestCase + def setup + @data = "Hello World" + @tempfile = Tempfile.open("arrow-file-input-stream") + @tempfile.write(@data) + @tempfile.close + end + + def test_new + input = Arrow::FileInputStream.new(@tempfile.path) + begin + buffer = input.read(5) + assert_equal("Hello", buffer.data.to_s) + ensure + input.close + end + end + + def test_close + input = Arrow::FileInputStream.new(@tempfile.path) + assert do + not input.closed? + end + input.close + assert do + input.closed? + end + end + + def test_size + input = Arrow::FileInputStream.new(@tempfile.path) + begin + assert_equal(@data.bytesize, input.size) + ensure + input.close + end + end + + def test_read + input = Arrow::FileInputStream.new(@tempfile.path) + begin + buffer = input.read(5) + assert_equal("Hello", buffer.data.to_s) + ensure + input.close + end + end + + def test_read_at + input = Arrow::FileInputStream.new(@tempfile.path) + begin + buffer = input.read_at(6, 5) + assert_equal("World", buffer.data.to_s) + ensure + input.close + end + end + + def test_mode + input = Arrow::FileInputStream.new(@tempfile.path) + begin + assert_equal(Arrow::FileMode::READ, input.mode) + ensure + input.close + end + end + + def test_file_descriptor + @tempfile.open + begin + fd = @tempfile.fileno + input = Arrow::FileInputStream.new(fd) + begin + assert_equal(fd, input.file_descriptor) + ensure + input.close + end + ensure + begin + @tempfile.close + rescue + end + end + end +end diff --git a/src/arrow/c_glib/test/test-file-output-stream.rb b/src/arrow/c_glib/test/test-file-output-stream.rb new file mode 100644 index 000000000..237781ac0 --- /dev/null +++ b/src/arrow/c_glib/test/test-file-output-stream.rb @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFileOutputStream < Test::Unit::TestCase + sub_test_case(".new") do + def test_create + tempfile = Tempfile.open("arrow-io-file-output-stream") + tempfile.write("Hello") + tempfile.close + file = Arrow::FileOutputStream.new(tempfile.path, false) + file.close + assert_equal("", File.read(tempfile.path)) + end + + def test_append + tempfile = Tempfile.open("arrow-io-file-output-stream") + tempfile.write("Hello") + tempfile.close + file = Arrow::FileOutputStream.new(tempfile.path, true) + file.close + assert_equal("Hello", File.read(tempfile.path)) + end + end +end diff --git a/src/arrow/c_glib/test/test-file-selector.rb b/src/arrow/c_glib/test/test-file-selector.rb new file mode 100644 index 000000000..23a4ea93b --- /dev/null +++ b/src/arrow/c_glib/test/test-file-selector.rb @@ -0,0 +1,82 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFileSelector < Test::Unit::TestCase + def setup + @file_selector = Arrow::FileSelector.new + end + + sub_test_case("#base_dir") do + test("default") do + assert do + "" == @file_selector.base_dir + end + end + end + + test("#base_dir=") do + @file_selector.base_dir = "/a/b" + assert do + "/a/b" == @file_selector.base_dir + end + end + + sub_test_case("#allow_not_found?") do + test("default") do + assert do + not @file_selector.allow_not_found? + end + end + end + + test("#allow_not_found=") do + @file_selector.allow_not_found = true + assert do + @file_selector.allow_not_found? + end + end + + sub_test_case("#recursive?") do + test("default") do + assert do + false == @file_selector.recursive? + end + end + end + + test("#recursive=") do + @file_selector.recursive = true + assert do + true == @file_selector.recursive? + end + end + + sub_test_case("#max_recursion") do + test("default") do + assert do + (1<<31) - 1 == @file_selector.max_recursion + end + end + end + + test("#max_recursion=") do + @file_selector.max_recursion = 42 + assert do + 42 == @file_selector.max_recursion + end + end +end diff --git a/src/arrow/c_glib/test/test-file-writer.rb b/src/arrow/c_glib/test/test-file-writer.rb new file mode 100644 index 000000000..5f9c3c4e1 --- /dev/null +++ b/src/arrow/c_glib/test/test-file-writer.rb @@ -0,0 +1,85 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFileWriter < Test::Unit::TestCase + include Helper::Buildable + + def test_write_record_batch + data = [true] + field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new) + schema = Arrow::Schema.new([field]) + + tempfile = Tempfile.open("arrow-ipc-file-writer") + output = Arrow::FileOutputStream.new(tempfile.path, false) + begin + file_writer = Arrow::RecordBatchFileWriter.new(output, schema) + begin + record_batch = Arrow::RecordBatch.new(schema, + data.size, + [build_boolean_array(data)]) + file_writer.write_record_batch(record_batch) + ensure + file_writer.close + end + ensure + output.close + end + + input = Arrow::MemoryMappedInputStream.new(tempfile.path) + begin + file_reader = Arrow::RecordBatchFileReader.new(input) + assert_equal([field.name], + file_reader.schema.fields.collect(&:name)) + assert_equal(Arrow::RecordBatch.new(schema, + data.size, + [build_boolean_array(data)]), + file_reader.read_record_batch(0)) + ensure + input.close + end + end + + def test_write_table + tempfile = Tempfile.open("arrow-ipc-file-writer") + output = Arrow::FileOutputStream.new(tempfile.path, false) + + array = build_boolean_array([true, false, true]) + field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new) + schema = Arrow::Schema.new([field]) + + begin + file_writer = Arrow::RecordBatchFileWriter.new(output, schema) + begin + table = Arrow::Table.new(schema, [array]) + file_writer.write_table(table) + ensure + file_writer.close + end + ensure + output.close + end + + input = Arrow::MemoryMappedInputStream.new(tempfile.path) + begin + file_reader = Arrow::RecordBatchFileReader.new(input) + assert_equal(Arrow::RecordBatch.new(schema, array.length, [array]), + file_reader.read_record_batch(0)) + ensure + input.close + end + end +end diff --git a/src/arrow/c_glib/test/test-filter.rb b/src/arrow/c_glib/test/test-filter.rb new file mode 100644 index 000000000..5ed035917 --- /dev/null +++ b/src/arrow/c_glib/test/test-filter.rb @@ -0,0 +1,247 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFilter < Test::Unit::TestCase + include Helper::Buildable + + sub_test_case("FilterOptions") do + def test_default_null_selection_behavior + assert_equal(Arrow::FilterNullSelectionBehavior::DROP, + Arrow::FilterOptions.new.null_selection_behavior) + end + end + + sub_test_case("Array") do + def setup + @filter = build_boolean_array([false, true, true, nil]) + end + + def test_filter + assert_equal(build_int16_array([1, 0]), + build_int16_array([0, 1, 0, 2]).filter(@filter)) + end + + def test_filter_emit_null + options = Arrow::FilterOptions.new + options.null_selection_behavior = :emit_null + assert_equal(build_int16_array([1, 0, nil]), + build_int16_array([0, 1, 0, 2]).filter(@filter, options)) + end + + def test_invalid_array_length + filter = build_boolean_array([false, true, true, false]) + assert_raise(Arrow::Error::Invalid) do + build_int16_array([0, 1, 0]).filter(filter) + end + end + end + + sub_test_case("Table") do + def setup + fields = [ + Arrow::Field.new("visible", Arrow::BooleanDataType.new), + Arrow::Field.new("valid", Arrow::BooleanDataType.new), + ] + @schema = Arrow::Schema.new(fields) + arrays = [ + build_boolean_array([true, false, true]), + build_boolean_array([false, true, true]), + ] + @table = Arrow::Table.new(@schema, arrays) + end + + def test_filter + filter = build_boolean_array([false, true, nil]) + arrays = [ + build_boolean_array([false]), + build_boolean_array([true]), + ] + filtered_table = Arrow::Table.new(@schema, arrays) + assert_equal(filtered_table, + @table.filter(filter)) + end + + def test_filter_emit_null + filter = build_boolean_array([false, true, nil]) + arrays = [ + build_boolean_array([false, nil]), + build_boolean_array([true, nil]), + ] + filtered_table = Arrow::Table.new(@schema, arrays) + options = Arrow::FilterOptions.new + options.null_selection_behavior = :emit_null + assert_equal(filtered_table, + @table.filter(filter, options)) + end + + def test_filter_chunked_array + chunks = [ + build_boolean_array([false]), + build_boolean_array([true, nil]), + ] + filter = Arrow::ChunkedArray.new(chunks) + arrays = [ + build_boolean_array([false]), + build_boolean_array([true]), + ] + filtered_table = Arrow::Table.new(@schema, arrays) + assert_equal(filtered_table, + @table.filter_chunked_array(filter)) + end + + def test_filter_chunked_array_emit_null + chunks = [ + build_boolean_array([false]), + build_boolean_array([true, nil]), + ] + filter = Arrow::ChunkedArray.new(chunks) + arrays = [ + build_boolean_array([false, nil]), + build_boolean_array([true, nil]), + ] + filtered_table = Arrow::Table.new(@schema, arrays) + options = Arrow::FilterOptions.new + options.null_selection_behavior = :emit_null + assert_equal(filtered_table, + @table.filter_chunked_array(filter, options)) + end + + def test_invalid_array_length + filter = build_boolean_array([false, true, true, false]) + assert_raise(Arrow::Error::Invalid) do + @table.filter(filter) + end + end + end + + sub_test_case("ChunkedArray") do + def setup + chunks = [ + build_boolean_array([true, false]), + build_boolean_array([true]), + ] + @chunked_array = Arrow::ChunkedArray.new(chunks) + end + + def test_filter + filter = build_boolean_array([false, true, nil]) + chunks = [ + build_boolean_array([false]), + ] + filtered_chunked_array = Arrow::ChunkedArray.new(chunks) + assert_equal(filtered_chunked_array, + @chunked_array.filter(filter)) + end + + def test_filter_emit_null + filter = build_boolean_array([false, true, nil]) + chunks = [ + build_boolean_array([false]), + build_boolean_array([nil]), + ] + filtered_chunked_array = Arrow::ChunkedArray.new(chunks) + options = Arrow::FilterOptions.new + options.null_selection_behavior = :emit_null + assert_equal(filtered_chunked_array, + @chunked_array.filter(filter, options)) + end + + def test_filter_chunked_array + chunks = [ + build_boolean_array([false]), + build_boolean_array([true, nil]), + ] + filter = Arrow::ChunkedArray.new(chunks) + filtered_chunks = [ + build_boolean_array([false]), + ] + filtered_chunked_array = Arrow::ChunkedArray.new(filtered_chunks) + assert_equal(filtered_chunked_array, + @chunked_array.filter_chunked_array(filter)) + end + + def test_filter_chunked_array_emit_null + chunks = [ + build_boolean_array([false]), + build_boolean_array([true, nil]), + ] + filter = Arrow::ChunkedArray.new(chunks) + filtered_chunks = [ + build_boolean_array([false]), + build_boolean_array([nil]), + ] + filtered_chunked_array = Arrow::ChunkedArray.new(filtered_chunks) + options = Arrow::FilterOptions.new + options.null_selection_behavior = :emit_null + assert_equal(filtered_chunked_array, + @chunked_array.filter_chunked_array(filter, options)) + end + + def test_invalid_array_length + filter = build_boolean_array([false, true, true, false]) + assert_raise(Arrow::Error::Invalid) do + @chunked_array.filter(filter) + end + end + end + + sub_test_case("RecordBatch") do + def setup + fields = [ + Arrow::Field.new("visible", Arrow::BooleanDataType.new), + Arrow::Field.new("valid", Arrow::BooleanDataType.new), + ] + @schema = Arrow::Schema.new(fields) + columns = [ + build_boolean_array([true, false, true]), + build_boolean_array([false, true, false]), + ] + @record_batch = Arrow::RecordBatch.new(@schema, 3, columns) + end + + def test_filter + filter = build_boolean_array([false, true, nil]) + columns = [ + build_boolean_array([false]), + build_boolean_array([true]), + ] + filtered_record_batch = Arrow::RecordBatch.new(@schema, 1, columns) + assert_equal(filtered_record_batch, + @record_batch.filter(filter)) + end + + def test_filter_emit_null + filter = build_boolean_array([false, true, nil]) + columns = [ + build_boolean_array([false, nil]), + build_boolean_array([true, nil]), + ] + filtered_record_batch = Arrow::RecordBatch.new(@schema, 2, columns) + options = Arrow::FilterOptions.new + options.null_selection_behavior = :emit_null + assert_equal(filtered_record_batch, + @record_batch.filter(filter, options)) + end + + def test_invalid_array_length + filter = build_boolean_array([false, true, true, false]) + assert_raise(Arrow::Error::Invalid) do + @record_batch.filter(filter) + end + end + end +end diff --git a/src/arrow/c_glib/test/test-fixed-size-binary-array.rb b/src/arrow/c_glib/test/test-fixed-size-binary-array.rb new file mode 100644 index 000000000..29189e78a --- /dev/null +++ b/src/arrow/c_glib/test/test-fixed-size-binary-array.rb @@ -0,0 +1,59 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFixedSizeBinaryrray < Test::Unit::TestCase + include Helper::Buildable + + def setup + @data_type = Arrow::FixedSizeBinaryDataType.new(4) + end + + def test_new + args = [ + @data_type, + 3, + Arrow::Buffer.new("0123abcd0000"), + Arrow::Buffer.new([0b011].pack("C*")), + -1, + ] + assert_equal(build_fixed_size_binary_array(@data_type, + ["0123", "abcd", nil]), + Arrow::FixedSizeBinaryArray.new(*args)) + end + + def test_buffer + array = build_fixed_size_binary_array(@data_type, + ["0123", "abcd", "0000"]) + assert_equal("0123abcd0000", array.buffer.data.to_s) + end + + def test_byte_width + array = build_fixed_size_binary_array(@data_type, ["0123"]) + assert_equal(@data_type.byte_width, array.byte_width) + end + + def test_value + array = build_fixed_size_binary_array(@data_type, ["0123"]) + assert_equal("0123", array.get_value(0).to_s) + end + + def test_values_bytes + array = build_fixed_size_binary_array(@data_type, + ["0123", "abcd", "0000"]) + assert_equal("0123abcd0000", array.values_bytes.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-fixed-size-binary-data-type.rb b/src/arrow/c_glib/test/test-fixed-size-binary-data-type.rb new file mode 100644 index 000000000..b2dfa9df6 --- /dev/null +++ b/src/arrow/c_glib/test/test-fixed-size-binary-data-type.rb @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFixedSizeBinaryDataType < Test::Unit::TestCase + def setup + @byte_width = 10 + @data_type = Arrow::FixedSizeBinaryDataType.new(@byte_width) + end + + def test_type + assert_equal(Arrow::Type::FIXED_SIZE_BINARY, @data_type.id) + end + + def test_name + assert_equal("fixed_size_binary", @data_type.name) + end + + def test_to_s + assert_equal("fixed_size_binary[10]", @data_type.to_s) + end + + def test_byte_width + assert_equal(@byte_width, @data_type.byte_width) + end + + def test_bit_width + assert_equal(@byte_width * 8, @data_type.bit_width) + end +end diff --git a/src/arrow/c_glib/test/test-fixed-size-binary-scalar.rb b/src/arrow/c_glib/test/test-fixed-size-binary-scalar.rb new file mode 100644 index 000000000..1a6f07035 --- /dev/null +++ b/src/arrow/c_glib/test/test-fixed-size-binary-scalar.rb @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFixedSizeBinaryScalar < Test::Unit::TestCase + def setup + @data_type = Arrow::FixedSizeBinaryDataType.new(3) + @buffer = Arrow::Buffer.new("\x03\x01\x02") + @scalar = Arrow::FixedSizeBinaryScalar.new(@data_type, @buffer) + end + + def test_data_type + assert_equal(@data_type, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::FixedSizeBinaryScalar.new(@data_type, @buffer), + @scalar) + end + + def test_to_s + assert_equal("\x03\x01\x02", @scalar.to_s) + end + + def test_value + assert_equal(@buffer, + @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-float-array.rb b/src/arrow/c_glib/test/test-float-array.rb new file mode 100644 index 000000000..a3e774384 --- /dev/null +++ b/src/arrow/c_glib/test/test-float-array.rb @@ -0,0 +1,67 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFloatArray < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + def test_new + assert_equal(build_float_array([-1.1, 2.2, nil]), + Arrow::FloatArray.new(3, + Arrow::Buffer.new([-1.1, 2.2].pack("f*")), + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_buffer + builder = Arrow::FloatArrayBuilder.new + builder.append_value(-1.1) + builder.append_value(2.2) + builder.append_value(-4.4) + array = builder.finish + assert_equal([-1.1, 2.2, -4.4].pack("f*"), array.buffer.data.to_s) + end + + def test_value + builder = Arrow::FloatArrayBuilder.new + builder.append_value(1.5) + array = builder.finish + assert_in_delta(1.5, array.get_value(0)) + end + + def test_values + require_gi_bindings(3, 1, 7) + builder = Arrow::FloatArrayBuilder.new + builder.append_value(1.5) + builder.append_value(3) + builder.append_value(4.5) + array = builder.finish + assert_equal([1.5, 3.0, 4.5], array.values) + end + + sub_test_case("#sum") do + def test_with_nil + array = build_float_array([1.5, 3.0, nil]) + assert_in_delta(4.5, array.sum) + end + + def test_empty + array = build_float_array([]) + assert_in_delta(0.0, array.sum) + end + end +end diff --git a/src/arrow/c_glib/test/test-float-data-type.rb b/src/arrow/c_glib/test/test-float-data-type.rb new file mode 100644 index 000000000..f70a60b53 --- /dev/null +++ b/src/arrow/c_glib/test/test-float-data-type.rb @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFloatDataType < Test::Unit::TestCase + def test_type + data_type = Arrow::FloatDataType.new + assert_equal(Arrow::Type::FLOAT, data_type.id) + end + + def test_name + data_type = Arrow::FloatDataType.new + assert_equal("float", data_type.name) + end + + def test_to_s + data_type = Arrow::FloatDataType.new + assert_equal("float", data_type.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-float-scalar.rb b/src/arrow/c_glib/test/test-float-scalar.rb new file mode 100644 index 000000000..1b830408c --- /dev/null +++ b/src/arrow/c_glib/test/test-float-scalar.rb @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFloatScalar < Test::Unit::TestCase + def setup + @scalar = Arrow::FloatScalar.new(1.1) + end + + def test_data_type + assert_equal(Arrow::FloatDataType.new, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + options = Arrow::EqualOptions.new + options.approx = true + assert do + @scalar.equal_options(Arrow::FloatScalar.new(1.1), options) + end + end + + def test_to_s + assert_equal("1.1", @scalar.to_s) + end + + def test_value + assert_in_delta(1.1, @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-function-doc.rb b/src/arrow/c_glib/test/test-function-doc.rb new file mode 100644 index 000000000..7e624a5ab --- /dev/null +++ b/src/arrow/c_glib/test/test-function-doc.rb @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFunctionDoc < Test::Unit::TestCase + def setup + @doc = Arrow::Function.find("or").doc + end + + def test_summary + assert_equal("Logical 'or' boolean values", + @doc.summary) + end + + def test_description + assert_equal(<<-DESCRIPTION.chomp, @doc.description) +When a null is encountered in either input, a null is output. +For a different null behavior, see function "or_kleene". + DESCRIPTION + end + + def test_arg_names + assert_equal(["x", "y"], @doc.arg_names) + end + + def test_options_class_name + doc = Arrow::Function.find("cast").doc + assert_equal("CastOptions", doc.options_class_name) + end +end diff --git a/src/arrow/c_glib/test/test-function.rb b/src/arrow/c_glib/test/test-function.rb new file mode 100644 index 000000000..390bed5cc --- /dev/null +++ b/src/arrow/c_glib/test/test-function.rb @@ -0,0 +1,83 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFunction < Test::Unit::TestCase + include Helper::Buildable + + sub_test_case("#execute") do + def test_array + or_function = Arrow::Function.find("or") + args = [ + Arrow::ArrayDatum.new(build_boolean_array([true, false, false])), + Arrow::ArrayDatum.new(build_boolean_array([true, false, true])), + ] + assert_equal(build_boolean_array([true, false, true]), + or_function.execute(args).value) + end + + def test_chunked_array + or_function = Arrow::Function.find("or") + chunked_arrays = [ + Arrow::ChunkedArray.new([ + build_boolean_array([true]), + build_boolean_array([false, false]), + ]), + Arrow::ChunkedArray.new([ + build_boolean_array([true, false]), + build_boolean_array([true]), + ]), + ] + args = chunked_arrays.collect do |chunked_array| + Arrow::ChunkedArrayDatum.new(chunked_array) + end + expected_array = build_boolean_array([true, false, true]) + expected = Arrow::ChunkedArray.new([expected_array]) + assert_equal(expected, + or_function.execute(args).value) + end + + def test_input_scalar + add_function = Arrow::Function.find("add") + args = [ + Arrow::ArrayDatum.new(build_int8_array([1, 2, 3])), + Arrow::ScalarDatum.new(Arrow::Int8Scalar.new(5)), + ] + assert_equal(build_int8_array([6, 7, 8]), + add_function.execute(args).value) + end + + def test_output_scalar + sum_function = Arrow::Function.find("sum") + args = [ + Arrow::ArrayDatum.new(build_int8_array([1, 2, 3])), + ] + assert_equal(Arrow::Int64Scalar.new(6), + sum_function.execute(args).value) + end + + def test_options + cast_function = Arrow::Function.find("cast") + args = [ + Arrow::ArrayDatum.new(build_string_array(["1", "2", "-3"])), + ] + options = Arrow::CastOptions.new + options.to_data_type = Arrow::Int8DataType.new + assert_equal(build_int8_array([1, 2, -3]), + cast_function.execute(args, options).value) + end + end +end diff --git a/src/arrow/c_glib/test/test-gio-input-stream.rb b/src/arrow/c_glib/test/test-gio-input-stream.rb new file mode 100644 index 000000000..8cc109987 --- /dev/null +++ b/src/arrow/c_glib/test/test-gio-input-stream.rb @@ -0,0 +1,72 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGIOInputStream < Test::Unit::TestCase + include Helper::Buildable + + def test_reader_backend + data = [true] + field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new) + schema = Arrow::Schema.new([field]) + + tempfile = Tempfile.open("arrow-gio-input-stream") + output = Arrow::FileOutputStream.new(tempfile.path, false) + begin + file_writer = Arrow::RecordBatchFileWriter.new(output, schema) + begin + record_batch = Arrow::RecordBatch.new(schema, + data.size, + [build_boolean_array(data)]) + file_writer.write_record_batch(record_batch) + ensure + file_writer.close + end + ensure + output.close + end + + file = Gio::File.new_for_path(tempfile.path) + input_stream = file.read + input = Arrow::GIOInputStream.new(input_stream) + begin + file_reader = Arrow::RecordBatchFileReader.new(input) + assert_equal([field.name], + file_reader.schema.fields.collect(&:name)) + assert_equal(Arrow::RecordBatch.new(schema, + data.size, + [build_boolean_array(data)]), + file_reader.read_record_batch(0)) + ensure + input.close + end + end + + def test_getter + input_stream = Gio::MemoryInputStream.new("Hello") + input = Arrow::GIOInputStream.new(input_stream) + assert_equal(input_stream, input.raw) + end + + def test_peek + input_stream = Gio::MemoryInputStream.new("Hello World") + buffered_input_stream = Gio::BufferedInputStream.new(input_stream) + input = Arrow::GIOInputStream.new(buffered_input_stream) + assert_equal("He", input.peek(2).to_s) + assert_equal("Hel", input.read_bytes(3).to_s) + assert_equal("lo ", input.peek(3).to_s) + end +end diff --git a/src/arrow/c_glib/test/test-gio-output-stream.rb b/src/arrow/c_glib/test/test-gio-output-stream.rb new file mode 100644 index 000000000..36756cb00 --- /dev/null +++ b/src/arrow/c_glib/test/test-gio-output-stream.rb @@ -0,0 +1,79 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGIOOutputStream < Test::Unit::TestCase + include Helper::Buildable + + def test_writer_backend + data = [true] + field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new) + schema = Arrow::Schema.new([field]) + + tempfile = Tempfile.open("arrow-gio-output-stream") + file = Gio::File.new_for_path(tempfile.path) + output_stream = file.append_to(:none) + output = Arrow::GIOOutputStream.new(output_stream) + begin + file_writer = Arrow::RecordBatchFileWriter.new(output, schema) + begin + record_batch = Arrow::RecordBatch.new(schema, + data.size, + [build_boolean_array(data)]) + file_writer.write_record_batch(record_batch) + ensure + file_writer.close + end + ensure + output.close + end + + input = Arrow::MemoryMappedInputStream.new(tempfile.path) + begin + file_reader = Arrow::RecordBatchFileReader.new(input) + assert_equal([field.name], + file_reader.schema.fields.collect(&:name)) + assert_equal(Arrow::RecordBatch.new(schema, + data.size, + [build_boolean_array(data)]), + file_reader.read_record_batch(0)) + ensure + input.close + end + end + + def test_getter + output_stream = Gio::MemoryOutputStream.new + output = Arrow::GIOOutputStream.new(output_stream) + assert_equal(output_stream, output.raw) + end + + def test_tell + unless Gio.const_defined?(:UnixOutputStream) + omit("Need Gio::UnixOutputStream") + end + tempfile = Tempfile.open("arrow-gio-output-stream") + begin + output_stream = Gio::UnixOutputStream.new(tempfile.to_i, false) + output = Arrow::GIOOutputStream.new(output_stream) + assert_equal(0, output.tell) + output.write("Hello") + assert_equal(5, output.tell) + ensure + tempfile.close! + end + end +end diff --git a/src/arrow/c_glib/test/test-int-array-builder.rb b/src/arrow/c_glib/test/test-int-array-builder.rb new file mode 100644 index 000000000..e1a6c3b21 --- /dev/null +++ b/src/arrow/c_glib/test/test-int-array-builder.rb @@ -0,0 +1,59 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestIntArrayBuilder < Test::Unit::TestCase + include Helper::Buildable + + def test_int8 + values = [-1, 2] + assert_equal(build_int_array([*values, nil]), + Arrow::Int8Array.new(3, + Arrow::Buffer.new(values.pack("c*")), + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_int16 + border_value = (2 ** (8 - 1)) + values = [-1, border_value] + assert_equal(build_int_array([*values, nil]), + Arrow::Int16Array.new(3, + Arrow::Buffer.new(values.pack("s*")), + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_int32 + border_value = (2 ** (16 - 1)) + values = [-1, border_value] + assert_equal(build_int_array([*values, nil]), + Arrow::Int32Array.new(3, + Arrow::Buffer.new(values.pack("l*")), + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_int64 + border_value = (2 ** (32 - 1)) + values = [-1, border_value] + assert_equal(build_int_array([*values, nil]), + Arrow::Int64Array.new(3, + Arrow::Buffer.new(values.pack("q*")), + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end +end diff --git a/src/arrow/c_glib/test/test-int16-array.rb b/src/arrow/c_glib/test/test-int16-array.rb new file mode 100644 index 000000000..8c159e910 --- /dev/null +++ b/src/arrow/c_glib/test/test-int16-array.rb @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestInt16Array < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + def test_new + assert_equal(build_int16_array([-1, 2, nil]), + Arrow::Int16Array.new(3, + Arrow::Buffer.new([-1, 2].pack("s*")), + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_buffer + builder = Arrow::Int16ArrayBuilder.new + builder.append_value(-1) + builder.append_value(2) + builder.append_value(-4) + array = builder.finish + assert_equal([-1, 2, -4].pack("s*"), array.buffer.data.to_s) + end + + def test_value + builder = Arrow::Int16ArrayBuilder.new + builder.append_value(-1) + array = builder.finish + assert_equal(-1, array.get_value(0)) + end + + def test_values + require_gi_bindings(3, 1, 7) + builder = Arrow::Int16ArrayBuilder.new + builder.append_value(-1) + builder.append_value(2) + builder.append_value(-4) + array = builder.finish + assert_equal([-1, 2, -4], array.values) + end + + def test_sum + array = build_int16_array([2, -4, nil]) + assert_equal(-2, array.sum) + end +end diff --git a/src/arrow/c_glib/test/test-int16-data-type.rb b/src/arrow/c_glib/test/test-int16-data-type.rb new file mode 100644 index 000000000..1b3d51f69 --- /dev/null +++ b/src/arrow/c_glib/test/test-int16-data-type.rb @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestInt16DataType < Test::Unit::TestCase + def test_type + data_type = Arrow::Int16DataType.new + assert_equal(Arrow::Type::INT16, data_type.id) + end + + def test_name + data_type = Arrow::Int16DataType.new + assert_equal("int16", data_type.name) + end + + def test_to_s + data_type = Arrow::Int16DataType.new + assert_equal("int16", data_type.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-int16-scalar.rb b/src/arrow/c_glib/test/test-int16-scalar.rb new file mode 100644 index 000000000..1a7927140 --- /dev/null +++ b/src/arrow/c_glib/test/test-int16-scalar.rb @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestInt16Scalar < Test::Unit::TestCase + def setup + @scalar = Arrow::Int16Scalar.new(-(2 ** 15)) + end + + def test_data_type + assert_equal(Arrow::Int16DataType.new, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::Int16Scalar.new(-(2 ** 15)), + @scalar) + end + + def test_to_s + assert_equal((-(2 ** 15)).to_s, @scalar.to_s) + end + + def test_value + assert_equal(-(2 ** 15), @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-int32-array.rb b/src/arrow/c_glib/test/test-int32-array.rb new file mode 100644 index 000000000..9dff0e97b --- /dev/null +++ b/src/arrow/c_glib/test/test-int32-array.rb @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestInt32Array < Test::Unit::TestCase + include Helper::Buildable + + def test_new + assert_equal(build_int32_array([-1, 2, nil]), + Arrow::Int32Array.new(3, + Arrow::Buffer.new([-1, 2].pack("l*")), + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_buffer + builder = Arrow::Int32ArrayBuilder.new + builder.append_value(-1) + builder.append_value(2) + builder.append_value(-4) + array = builder.finish + assert_equal([-1, 2, -4].pack("l*"), array.buffer.data.to_s) + end + + def test_value + builder = Arrow::Int32ArrayBuilder.new + builder.append_value(-1) + array = builder.finish + assert_equal(-1, array.get_value(0)) + end + + def test_values + builder = Arrow::Int32ArrayBuilder.new + builder.append_value(-1) + builder.append_value(2) + builder.append_value(-4) + array = builder.finish + assert_equal([-1, 2, -4], array.values) + end + + def test_sum + array = build_int32_array([2, -4, nil]) + assert_equal(-2, array.sum) + end +end diff --git a/src/arrow/c_glib/test/test-int32-data-type.rb b/src/arrow/c_glib/test/test-int32-data-type.rb new file mode 100644 index 000000000..2afb51742 --- /dev/null +++ b/src/arrow/c_glib/test/test-int32-data-type.rb @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestInt32DataType < Test::Unit::TestCase + def test_type + data_type = Arrow::Int32DataType.new + assert_equal(Arrow::Type::INT32, data_type.id) + end + + def test_name + data_type = Arrow::Int32DataType.new + assert_equal("int32", data_type.name) + end + + def test_to_s + data_type = Arrow::Int32DataType.new + assert_equal("int32", data_type.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-int32-scalar.rb b/src/arrow/c_glib/test/test-int32-scalar.rb new file mode 100644 index 000000000..eba554845 --- /dev/null +++ b/src/arrow/c_glib/test/test-int32-scalar.rb @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestInt32Scalar < Test::Unit::TestCase + def setup + @scalar = Arrow::Int32Scalar.new(-(2 ** 31)) + end + + def test_data_type + assert_equal(Arrow::Int32DataType.new, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::Int32Scalar.new(-(2 ** 31)), + @scalar) + end + + def test_to_s + assert_equal((-(2 ** 31)).to_s, @scalar.to_s) + end + + def test_value + assert_equal(-(2 ** 31), @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-int64-array.rb b/src/arrow/c_glib/test/test-int64-array.rb new file mode 100644 index 000000000..f6327c7c2 --- /dev/null +++ b/src/arrow/c_glib/test/test-int64-array.rb @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestInt64Array < Test::Unit::TestCase + include Helper::Buildable + + def test_new + assert_equal(build_int64_array([-1, 2, nil]), + Arrow::Int64Array.new(3, + Arrow::Buffer.new([-1, 2].pack("q*")), + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_buffer + builder = Arrow::Int64ArrayBuilder.new + builder.append_value(-1) + builder.append_value(2) + builder.append_value(-4) + array = builder.finish + assert_equal([-1, 2, -4].pack("q*"), array.buffer.data.to_s) + end + + def test_value + builder = Arrow::Int64ArrayBuilder.new + builder.append_value(-1) + array = builder.finish + assert_equal(-1, array.get_value(0)) + end + + def test_values + builder = Arrow::Int64ArrayBuilder.new + builder.append_value(-1) + builder.append_value(2) + builder.append_value(-4) + array = builder.finish + assert_equal([-1, 2, -4], array.values) + end + + def test_sum + array = build_int64_array([2, -4, nil]) + assert_equal(-2, array.sum) + end +end diff --git a/src/arrow/c_glib/test/test-int64-data-type.rb b/src/arrow/c_glib/test/test-int64-data-type.rb new file mode 100644 index 000000000..39764df5c --- /dev/null +++ b/src/arrow/c_glib/test/test-int64-data-type.rb @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestInt64DataType < Test::Unit::TestCase + def test_type + data_type = Arrow::Int64DataType.new + assert_equal(Arrow::Type::INT64, data_type.id) + end + + def test_name + data_type = Arrow::Int64DataType.new + assert_equal("int64", data_type.name) + end + + def test_to_s + data_type = Arrow::Int64DataType.new + assert_equal("int64", data_type.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-int64-scalar.rb b/src/arrow/c_glib/test/test-int64-scalar.rb new file mode 100644 index 000000000..bfa7b4529 --- /dev/null +++ b/src/arrow/c_glib/test/test-int64-scalar.rb @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestInt64Scalar < Test::Unit::TestCase + def setup + @scalar = Arrow::Int64Scalar.new(-(2 ** 63)) + end + + def test_data_type + assert_equal(Arrow::Int64DataType.new, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::Int64Scalar.new(-(2 ** 63)), + @scalar) + end + + def test_to_s + assert_equal((-(2 ** 63)).to_s, @scalar.to_s) + end + + def test_value + assert_equal(-(2 ** 63), @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-int8-array.rb b/src/arrow/c_glib/test/test-int8-array.rb new file mode 100644 index 000000000..21740305b --- /dev/null +++ b/src/arrow/c_glib/test/test-int8-array.rb @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestInt8Array < Test::Unit::TestCase + include Helper::Buildable + + def test_new + assert_equal(build_int8_array([-1, 2, nil]), + Arrow::Int8Array.new(3, + Arrow::Buffer.new([-1, 2].pack("c*")), + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_buffer + builder = Arrow::Int8ArrayBuilder.new + builder.append_value(-1) + builder.append_value(2) + builder.append_value(-4) + array = builder.finish + assert_equal([-1, 2, -4].pack("c*"), array.buffer.data.to_s) + end + + def test_value + builder = Arrow::Int8ArrayBuilder.new + builder.append_value(-1) + array = builder.finish + assert_equal(-1, array.get_value(0)) + end + + def test_values + builder = Arrow::Int8ArrayBuilder.new + builder.append_value(-1) + builder.append_value(2) + builder.append_value(-4) + array = builder.finish + assert_equal([-1, 2, -4], array.values) + end + + sub_test_case("#sum") do + def test_with_null + array = build_int8_array([2, -4, nil]) + assert_equal(-2, array.sum) + end + + def test_empty + array = build_int8_array([]) + assert_equal(0, array.sum) + end + end +end diff --git a/src/arrow/c_glib/test/test-int8-data-type.rb b/src/arrow/c_glib/test/test-int8-data-type.rb new file mode 100644 index 000000000..53391a34d --- /dev/null +++ b/src/arrow/c_glib/test/test-int8-data-type.rb @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestInt8DataType < Test::Unit::TestCase + def test_type + data_type = Arrow::Int8DataType.new + assert_equal(Arrow::Type::INT8, data_type.id) + end + + def test_signed? + data_type = Arrow::Int8DataType.new + assert do + data_type.signed? + end + end + + def test_name + data_type = Arrow::Int8DataType.new + assert_equal("int8", data_type.name) + end + + def test_to_s + data_type = Arrow::Int8DataType.new + assert_equal("int8", data_type.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-int8-scalar.rb b/src/arrow/c_glib/test/test-int8-scalar.rb new file mode 100644 index 000000000..214c59073 --- /dev/null +++ b/src/arrow/c_glib/test/test-int8-scalar.rb @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestInt8Scalar < Test::Unit::TestCase + def setup + @scalar = Arrow::Int8Scalar.new(-128) + end + + def test_data_type + assert_equal(Arrow::Int8DataType.new, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::Int8Scalar.new(-128), + @scalar) + end + + def test_to_s + assert_equal("-128", @scalar.to_s) + end + + def test_value + assert_equal(-128, @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-is-in.rb b/src/arrow/c_glib/test/test-is-in.rb new file mode 100644 index 000000000..590b5e379 --- /dev/null +++ b/src/arrow/c_glib/test/test-is-in.rb @@ -0,0 +1,120 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestIsIn < Test::Unit::TestCase + include Helper::Buildable + + sub_test_case("Array") do + def test_no_null + left = build_int16_array([1, 0, 1, 2]) + right = build_int16_array([2, 0]) + assert_equal(build_boolean_array([false, true, false, true]), + left.is_in(right)) + end + + def test_null_in_left + left = build_int16_array([1, 0, nil, 2]) + right = build_int16_array([2, 0, 3]) + assert_equal(build_boolean_array([false, true, false, true]), + left.is_in(right)) + end + + def test_null_in_right + left = build_int16_array([1, 0, 1, 2]) + right = build_int16_array([2, 0, nil]) + assert_equal(build_boolean_array([false, true, false, true]), + left.is_in(right)) + end + + def test_null_in_both + left = build_int16_array([1, 0, nil, 2]) + right = build_int16_array([2, 0, nil]) + assert_equal(build_boolean_array([false, true, true, true]), + left.is_in(right)) + end + + def test_options + left = build_int16_array([1, 0, nil, 2]) + right = build_int16_array([2, 0, nil]) + is_in = Arrow::Function.find("is_in") + options = Arrow::SetLookupOptions.new(Arrow::ArrayDatum.new(right)) + assert_equal(build_boolean_array([false, true, true, true]), + is_in.execute([Arrow::ArrayDatum.new(left)], + options).value) + end + end + + sub_test_case("ChunkedArray") do + def test_no_null + left = build_int16_array([1, 0, 1, 2]) + chunks = [ + build_int16_array([1, 4]), + build_int16_array([3, 0]) + ] + right = Arrow::ChunkedArray.new(chunks) + assert_equal(build_boolean_array([true, true, true, false]), + left.is_in_chunked_array(right)) + end + + def test_null_in_left + left = build_int16_array([1, 0, nil, 2]) + chunks = [ + build_int16_array([2, 0]), + build_int16_array([3, 4]) + ] + right = Arrow::ChunkedArray.new(chunks) + assert_equal(build_boolean_array([false, true, false, true]), + left.is_in_chunked_array(right)) + end + + def test_null_in_right + left = build_int16_array([1, 0, 1, 2]) + chunks = [ + build_int16_array([2, 0]), + build_int16_array([3, nil]) + ] + right = Arrow::ChunkedArray.new(chunks) + assert_equal(build_boolean_array([false, true, false, true]), + left.is_in_chunked_array(right)) + end + + def test_null_in_both + left = build_int16_array([1, 0, nil, 2]) + chunks = [ + build_int16_array([2, 0]), + build_int16_array([3, nil]) + ] + right = Arrow::ChunkedArray.new(chunks) + assert_equal(build_boolean_array([false, true, true, true]), + left.is_in_chunked_array(right)) + end + + def test_options + left = build_int16_array([1, 0, nil, 2]) + chunks = [ + build_int16_array([2, 0]), + build_int16_array([3, nil]) + ] + right = Arrow::ChunkedArray.new(chunks) + is_in = Arrow::Function.find("is_in") + options = Arrow::SetLookupOptions.new(Arrow::ChunkedArrayDatum.new(right)) + assert_equal(build_boolean_array([false, true, true, true]), + is_in.execute([Arrow::ArrayDatum.new(left)], + options).value) + end + end +end diff --git a/src/arrow/c_glib/test/test-json-reader.rb b/src/arrow/c_glib/test/test-json-reader.rb new file mode 100644 index 000000000..df028a451 --- /dev/null +++ b/src/arrow/c_glib/test/test-json-reader.rb @@ -0,0 +1,90 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestJSONReader < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + sub_test_case("#read") do + def open_input(json) + buffer = Arrow::Buffer.new(json) + Arrow::BufferInputStream.new(buffer) + end + + def test_default + table = Arrow::JSONReader.new(open_input(<<-JSON)) +{ "message": "Hello", "count": 3.5, "valid": false } +{ "message": "World", "count": 3.25, "valid": true } + JSON + columns = { + "message" => build_string_array(["Hello", "World"]), + "count" => build_double_array([3.5, 3.25]), + "valid" => build_boolean_array([false, true]), + } + assert_equal(build_table(columns), + table.read) + end + + sub_test_case("unexpected-field-behavior") do + def setup + @options = Arrow::JSONReadOptions.new + field = Arrow::Field.new("message", Arrow::StringDataType.new) + schema = Arrow::Schema.new([field]) + @options.schema = schema + end + + def test_ignore + @options.unexpected_field_behavior = :ignore + table = Arrow::JSONReader.new(open_input(<<-JSON), @options) +{ "message": "Hello", "count": 3.5, "valid": false } +{ "message": "World", "count": 3.25, "valid": true } + JSON + columns = { + "message" => build_string_array(["Hello", "World"]), + } + assert_equal(build_table(columns), + table.read) + end + + def test_error + @options.unexpected_field_behavior = :error + table = Arrow::JSONReader.new(open_input(<<-JSON), @options) +{ "message": "Hello", "count": 3.5, "valid": false } +{ "message": "World", "count": 3.25, "valid": true } + JSON + assert_raise(Arrow::Error::Invalid) do + table.read + end + end + + def test_infer_type + @options.unexpected_field_behavior = :infer_type + table = Arrow::JSONReader.new(open_input(<<-JSON), @options) +{ "message": "Hello", "count": 3.5, "valid": false } +{ "message": "World", "count": 3.25, "valid": true } + JSON + columns = { + "message" => build_string_array(["Hello", "World"]), + "count" => build_double_array([3.5, 3.25]), + "valid" => build_boolean_array([false, true]), + } + assert_equal(build_table(columns), + table.read) + end + end + end +end diff --git a/src/arrow/c_glib/test/test-large-binary-array.rb b/src/arrow/c_glib/test/test-large-binary-array.rb new file mode 100644 index 000000000..019727fdc --- /dev/null +++ b/src/arrow/c_glib/test/test-large-binary-array.rb @@ -0,0 +1,61 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestLargeBinaryArray < Test::Unit::TestCase + include Helper::Buildable + + def test_new + value_offsets = Arrow::Buffer.new([0, 2, 5, 5].pack("q*")) + data = Arrow::Buffer.new("\x00\x01\x02\x03\x04") + assert_equal(build_large_binary_array(["\x00\x01", "\x02\x03\x04", nil]), + Arrow::LargeBinaryArray.new(3, + value_offsets, + data, + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_value + data = "\x00\x01\x02" + builder = Arrow::LargeBinaryArrayBuilder.new + builder.append_value(data) + array = builder.finish + assert_equal(data, array.get_value(0).to_s) + end + + def test_buffer + data1 = "\x00\x01\x02" + data2 = "\x03\x04\x05" + builder = Arrow::LargeBinaryArrayBuilder.new + builder.append_value(data1) + builder.append_value(data2) + array = builder.finish + assert_equal(data1 + data2, array.buffer.data.to_s) + end + + def test_offsets_buffer + data1 = "\x00\x01" + data2 = "\x02\x03\x04" + builder = Arrow::LargeBinaryArrayBuilder.new + builder.append_value(data1) + builder.append_value(data2) + array = builder.finish + byte_per_offset = 8 + assert_equal([0, 2, 5].pack("q*"), + array.offsets_buffer.data.to_s[0, byte_per_offset * 3]) + end +end diff --git a/src/arrow/c_glib/test/test-large-binary-data-type.rb b/src/arrow/c_glib/test/test-large-binary-data-type.rb new file mode 100644 index 000000000..63983b7af --- /dev/null +++ b/src/arrow/c_glib/test/test-large-binary-data-type.rb @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestLargeBinaryDataType < Test::Unit::TestCase + def test_type + data_type = Arrow::LargeBinaryDataType.new + assert_equal(Arrow::Type::LARGE_BINARY, data_type.id) + end + + def test_name + data_type = Arrow::LargeBinaryDataType.new + assert_equal("large_binary", data_type.name) + end + + def test_to_s + data_type = Arrow::LargeBinaryDataType.new + assert_equal("large_binary", data_type.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-large-binary-scalar.rb b/src/arrow/c_glib/test/test-large-binary-scalar.rb new file mode 100644 index 000000000..a6bc4addb --- /dev/null +++ b/src/arrow/c_glib/test/test-large-binary-scalar.rb @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestLargeBinaryScalar < Test::Unit::TestCase + def setup + @buffer = Arrow::Buffer.new("\x03\x01\x02") + @scalar = Arrow::LargeBinaryScalar.new(@buffer) + end + + def test_data_type + assert_equal(Arrow::LargeBinaryDataType.new, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::LargeBinaryScalar.new(@buffer), + @scalar) + end + + def test_to_s + assert_equal("...", @scalar.to_s) + end + + def test_value + assert_equal(@buffer, + @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-large-list-array.rb b/src/arrow/c_glib/test/test-large-list-array.rb new file mode 100644 index 000000000..2f7efab5a --- /dev/null +++ b/src/arrow/c_glib/test/test-large-list-array.rb @@ -0,0 +1,98 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestLargeListArray < Test::Unit::TestCase + include Helper::Buildable + + def test_new + field = Arrow::Field.new("value", Arrow::Int64DataType.new) + data_type = Arrow::LargeListDataType.new(field) + value_offsets = Arrow::Buffer.new([0, 2, 5, 5].pack("q*")) + data = Arrow::Buffer.new([1, 2, 3, 4, 5].pack("q*")) + nulls = Arrow::Buffer.new([0b11111].pack("C*")) + values = Arrow::Int64Array.new(5, data, nulls, 0) + assert_equal(build_large_list_array(Arrow::Int64DataType.new, + [[1, 2], [3, 4, 5], nil]), + Arrow::LargeListArray.new(data_type, + 3, + value_offsets, + values, + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_value + array = build_large_list_array(Arrow::Int8DataType.new, + [ + [-29, 29], + [-1, 0, 1], + ]) + value = array.get_value(1) + assert_equal([-1, 0, 1], + value.length.times.collect {|i| value.get_value(i)}) + end + + def test_value_type + field = Arrow::Field.new("value", Arrow::Int64DataType.new) + data_type = Arrow::LargeListDataType.new(field) + builder = Arrow::LargeListArrayBuilder.new(data_type) + array = builder.finish + assert_equal(Arrow::Int64DataType.new, array.value_type) + end + + + def test_values + array = build_large_list_array(Arrow::Int8DataType.new, + [ + [-29, 29], + [-1, 0, 1], + ]) + values = array.values + assert_equal([-29, 29, -1, 0, 1], + values.length.times.collect {|i| values.get_value(i)}) + end + + def test_value_offset + array = build_large_list_array(Arrow::Int8DataType.new, + [ + [-29, 29], + [-1, 0, 1], + ]) + assert_equal([0, 2], + array.length.times.collect {|i| array.get_value_offset(i)}) + end + + def test_value_length + array = build_large_list_array(Arrow::Int8DataType.new, + [ + [-29, 29], + [-1, 0, 1], + ]) + assert_equal([2, 3], + array.length.times.collect {|i| array.get_value_length(i)}) + end + + def test_value_offsets + array = build_large_list_array(Arrow::Int8DataType.new, + [ + [-29, 29], + [-1, 0, 1], + ]) + assert_equal([0, 2, 5], + array.value_offsets) + end +end diff --git a/src/arrow/c_glib/test/test-large-list-data-type.rb b/src/arrow/c_glib/test/test-large-list-data-type.rb new file mode 100644 index 000000000..f06fed0a6 --- /dev/null +++ b/src/arrow/c_glib/test/test-large-list-data-type.rb @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestLargeListDataType < Test::Unit::TestCase + def setup + @field_data_type = Arrow::BooleanDataType.new + @field = Arrow::Field.new("enabled", @field_data_type) + @data_type = Arrow::LargeListDataType.new(@field) + end + + def test_type + assert_equal(Arrow::Type::LARGE_LIST, @data_type.id) + end + + def test_name + assert_equal("large_list", @data_type.name) + end + + + def test_to_s + assert_equal("large_list<enabled: bool>", @data_type.to_s) + end + + def test_field + assert_equal([ + @field, + @field_data_type, + ], + [ + @data_type.field, + @data_type.field.data_type, + ]) + end +end diff --git a/src/arrow/c_glib/test/test-large-string-array.rb b/src/arrow/c_glib/test/test-large-string-array.rb new file mode 100644 index 000000000..d77f9fee7 --- /dev/null +++ b/src/arrow/c_glib/test/test-large-string-array.rb @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestLargeStringArray < Test::Unit::TestCase + include Helper::Buildable + + def test_new + value_offsets = Arrow::Buffer.new([0, 5, 11, 11].pack("q*")) + data = Arrow::Buffer.new("HelloWorld!") + assert_equal(build_large_string_array(["Hello", "World!", nil]), + Arrow::LargeStringArray.new(3, + value_offsets, + data, + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_value + builder = Arrow::LargeStringArrayBuilder.new + builder.append_string("Hello World") + array = builder.finish + assert_equal("Hello World", array.get_string(0)) + end + + def test_buffer + builder = Arrow::LargeStringArrayBuilder.new + builder.append_string("Hello") + builder.append_string("World") + array = builder.finish + assert_equal("HelloWorld", array.buffer.data.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-large-string-data-type.rb b/src/arrow/c_glib/test/test-large-string-data-type.rb new file mode 100644 index 000000000..731e27403 --- /dev/null +++ b/src/arrow/c_glib/test/test-large-string-data-type.rb @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestLargeStringDataType < Test::Unit::TestCase + def test_type + data_type = Arrow::LargeStringDataType.new + assert_equal(Arrow::Type::LARGE_STRING, data_type.id) + end + + def test_name + data_type = Arrow::LargeStringDataType.new + assert_equal("large_utf8", data_type.name) + end + + def test_to_s + data_type = Arrow::LargeStringDataType.new + assert_equal("large_string", data_type.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-large-string-scalar.rb b/src/arrow/c_glib/test/test-large-string-scalar.rb new file mode 100644 index 000000000..13e28f647 --- /dev/null +++ b/src/arrow/c_glib/test/test-large-string-scalar.rb @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestLargeStringScalar < Test::Unit::TestCase + def setup + @buffer = Arrow::Buffer.new("Hello") + @scalar = Arrow::LargeStringScalar.new(@buffer) + end + + def test_data_type + assert_equal(Arrow::LargeStringDataType.new, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::LargeStringScalar.new(@buffer), + @scalar) + end + + def test_to_s + assert_equal("...", @scalar.to_s) + end + + def test_value + assert_equal(@buffer, + @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-list-array.rb b/src/arrow/c_glib/test/test-list-array.rb new file mode 100644 index 000000000..f94b28dd1 --- /dev/null +++ b/src/arrow/c_glib/test/test-list-array.rb @@ -0,0 +1,97 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestListArray < Test::Unit::TestCase + include Helper::Buildable + + def test_new + field = Arrow::Field.new("value", Arrow::Int8DataType.new) + data_type = Arrow::ListDataType.new(field) + value_offsets = Arrow::Buffer.new([0, 2, 5, 5].pack("l*")) + data = Arrow::Buffer.new([1, 2, 3, 4, 5].pack("c*")) + nulls = Arrow::Buffer.new([0b11111].pack("C*")) + values = Arrow::Int8Array.new(5, data, nulls, 0) + assert_equal(build_list_array(Arrow::Int8DataType.new, + [[1, 2], [3, 4, 5], nil]), + Arrow::ListArray.new(data_type, + 3, + value_offsets, + values, + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_value + array = build_list_array(Arrow::Int8DataType.new, + [ + [-29, 29], + [-1, 0, 1], + ]) + value = array.get_value(1) + assert_equal([-1, 0, 1], + value.length.times.collect {|i| value.get_value(i)}) + end + + def test_value_type + field = Arrow::Field.new("value", Arrow::Int8DataType.new) + data_type = Arrow::ListDataType.new(field) + builder = Arrow::ListArrayBuilder.new(data_type) + array = builder.finish + assert_equal(Arrow::Int8DataType.new, array.value_type) + end + + def test_values + array = build_list_array(Arrow::Int8DataType.new, + [ + [-29, 29], + [-1, 0, 1], + ]) + values = array.values + assert_equal([-29, 29, -1, 0, 1], + values.length.times.collect {|i| values.get_value(i)}) + end + + def test_value_offset + array = build_list_array(Arrow::Int8DataType.new, + [ + [-29, 29], + [-1, 0, 1], + ]) + assert_equal([0, 2], + array.length.times.collect {|i| array.get_value_offset(i)}) + end + + def test_value_length + array = build_list_array(Arrow::Int8DataType.new, + [ + [-29, 29], + [-1, 0, 1], + ]) + assert_equal([2, 3], + array.length.times.collect {|i| array.get_value_length(i)}) + end + + def test_value_offsets + array = build_list_array(Arrow::Int8DataType.new, + [ + [-29, 29], + [-1, 0, 1], + ]) + assert_equal([0, 2, 5], + array.value_offsets) + end +end diff --git a/src/arrow/c_glib/test/test-list-data-type.rb b/src/arrow/c_glib/test/test-list-data-type.rb new file mode 100644 index 000000000..f9731d2fe --- /dev/null +++ b/src/arrow/c_glib/test/test-list-data-type.rb @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestListDataType < Test::Unit::TestCase + def setup + @field_data_type = Arrow::BooleanDataType.new + @field = Arrow::Field.new("enabled", @field_data_type) + @data_type = Arrow::ListDataType.new(@field) + end + + def test_type + assert_equal(Arrow::Type::LIST, @data_type.id) + end + + def test_name + assert_equal("list", @data_type.name) + end + + + def test_to_s + assert_equal("list<enabled: bool>", @data_type.to_s) + end + + def test_field + assert_equal([ + @field, + @field_data_type, + ], + [ + @data_type.field, + @data_type.field.data_type, + ]) + end +end diff --git a/src/arrow/c_glib/test/test-list-scalar.rb b/src/arrow/c_glib/test/test-list-scalar.rb new file mode 100644 index 000000000..3fda3f25b --- /dev/null +++ b/src/arrow/c_glib/test/test-list-scalar.rb @@ -0,0 +1,50 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestListScalar < Test::Unit::TestCase + include Helper::Buildable + + def setup + @value = build_list_array(Arrow::Int8DataType.new, + [[1, 2, 3]]) + @scalar = Arrow::ListScalar.new(@value) + end + + def test_data_type + assert_equal(@value.value_data_type, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::ListScalar.new(@value), + @scalar) + end + + def test_to_s + assert_equal("...", @scalar.to_s) + end + + def test_value + assert_equal(@value, @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-literal-expression.rb b/src/arrow/c_glib/test/test-literal-expression.rb new file mode 100644 index 000000000..7a8796fb4 --- /dev/null +++ b/src/arrow/c_glib/test/test-literal-expression.rb @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestLiteralExpression < Test::Unit::TestCase + def setup + @scalar = Arrow::BooleanScalar.new(true) + @datum = Arrow::ScalarDatum.new(@scalar) + @expression = Arrow::LiteralExpression.new(@datum) + end + + sub_test_case("==") do + def test_true + assert_equal(Arrow::LiteralExpression.new(@datum), + Arrow::LiteralExpression.new(@datum)) + end + + def test_false + assert_not_equal(@expression, + Arrow::FieldExpression.new("visible")) + end + end + + def test_to_string + assert_equal("true", @expression.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-local-file-system.rb b/src/arrow/c_glib/test/test-local-file-system.rb new file mode 100644 index 000000000..884f2bdf5 --- /dev/null +++ b/src/arrow/c_glib/test/test-local-file-system.rb @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require_relative "file-system-tests" + +class TestLocalFileSystem < Test::Unit::TestCase + def setup + Dir.mktmpdir do |tmpdir| + local_fs = Arrow::LocalFileSystem.new(build_options) + @fs = Arrow::SubTreeFileSystem.new(tmpdir, local_fs) + yield + end + end + + def build_options + Arrow::LocalFileSystemOptions.new + end + + sub_test_case("default") do + include FileSystemTests + + def test_type_name + assert_equal([ + "local", + "subtree", + ], + [ + @fs.base_file_system.type_name, + @fs.type_name, + ]) + end + end + + sub_test_case("mmap") do + include FileSystemTests + + def build_options + options = Arrow::LocalFileSystemOptions.new + options.use_mmap = true + options + end + end +end diff --git a/src/arrow/c_glib/test/test-map-array-builder.rb b/src/arrow/c_glib/test/test-map-array-builder.rb new file mode 100644 index 000000000..ce5023ae4 --- /dev/null +++ b/src/arrow/c_glib/test/test-map-array-builder.rb @@ -0,0 +1,143 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestMapArrayBuilder < Test::Unit::TestCase + include Helper::Buildable + + def setup + offsets = build_int32_array([0, 2, 5]) + keys = build_string_array(["a", "b", "c", "d", "e"]) + items = build_int16_array([0, 1, 2, 3, 4]) + @map_array = Arrow::MapArray.new(offsets, + keys, + items) + key_type = Arrow::StringDataType.new + item_type = Arrow::Int16DataType.new + data_type = Arrow::MapDataType.new(key_type, item_type) + @builder = Arrow::MapArrayBuilder.new(data_type) + end + + def test_append_value + key_builder = @builder.key_builder + item_builder = @builder.item_builder + + @builder.append_value + key_builder.append_string("a") + key_builder.append_string("b") + item_builder.append_value(0) + item_builder.append_value(1) + + @builder.append_value + key_builder.append_string("c") + key_builder.append_string("d") + key_builder.append_string("e") + item_builder.append_value(2) + item_builder.append_value(3) + item_builder.append_value(4) + + array = @builder.finish + assert_equal([ + @map_array.get_value(0), + @map_array.get_value(1) + ], + [ + array.get_value(0), + array.get_value(1) + ]) + end + + def test_append_values + key_builder = @builder.key_builder + item_builder = @builder.item_builder + @builder.append_values([0, 2, 5]) + key_builder.append_strings(["a", "b", "c", "d", "e"]) + item_builder.append_values([0, 1, 2, 3, 4]) + + array = @builder.finish + assert_equal([ + @map_array.get_value(0), + @map_array.get_value(1) + ], + [ + array.get_value(0), + array.get_value(1) + ]) + end + + def test_append_structs + value_builder = @builder.value_builder + + @builder.append_value + value_builder.append_value + value_builder.get_field_builder(0).append_string("a") + value_builder.get_field_builder(0).append_string("b") + value_builder.get_field_builder(1).append_value(0) + value_builder.get_field_builder(1).append_value(1) + + @builder.append_value + value_builder.append_value + value_builder.get_field_builder(0).append_string("c") + value_builder.get_field_builder(0).append_string("d") + value_builder.get_field_builder(0).append_string("e") + value_builder.get_field_builder(1).append_value(2) + value_builder.get_field_builder(1).append_value(3) + value_builder.get_field_builder(1).append_value(4) + + array = @builder.finish + assert_equal([ + @map_array.get_value(0), + @map_array.get_value(1) + ], + [ + array.get_value(0), + array.get_value(1) + ]) + end + + def test_append_null + @builder.append_null + @builder.append_null + array = @builder.finish + assert_equal(2, array.n_nulls) + end + + def test_append_nulls + @builder.append_nulls(2) + array = @builder.finish + assert_equal(2, array.n_nulls) + end + + def test_append_empty_value + offsets = build_int32_array([0, 0]) + keys = build_string_array([]) + items = build_int16_array([]) + expected_array = Arrow::MapArray.new(offsets, keys, items) + @builder.append_empty_value + assert_equal(expected_array, + @builder.finish) + end + + def test_append_empty_values + offsets = build_int32_array([0, 0, 0, 0]) + keys = build_string_array([]) + items = build_int16_array([]) + expected_array = Arrow::MapArray.new(offsets, keys, items) + @builder.append_empty_values(3) + assert_equal(expected_array, + @builder.finish) + end +end diff --git a/src/arrow/c_glib/test/test-map-array.rb b/src/arrow/c_glib/test/test-map-array.rb new file mode 100644 index 000000000..d70746c84 --- /dev/null +++ b/src/arrow/c_glib/test/test-map-array.rb @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestMapArray < Test::Unit::TestCase + include Helper::Buildable + + def setup + offsets = build_int32_array([0, 2, 5]) + @keys = build_string_array(["a", "b", "c", "d", "e"]) + @items = build_int16_array([0, 1, 2, 3, 4]) + @map_array = Arrow::MapArray.new(offsets, + @keys, + @items) + end + + def test_keys + assert_equal(@keys, + @map_array.keys) + end + + def test_items + assert_equal(@items, + @map_array.items) + end +end diff --git a/src/arrow/c_glib/test/test-map-data-type.rb b/src/arrow/c_glib/test/test-map-data-type.rb new file mode 100644 index 000000000..c537c43d8 --- /dev/null +++ b/src/arrow/c_glib/test/test-map-data-type.rb @@ -0,0 +1,44 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestMapDataType < Test::Unit::TestCase + def setup + @key_type = Arrow::StringDataType.new + @item_type = Arrow::Int16DataType.new + @data_type = Arrow::MapDataType.new(@key_type, @item_type) + end + + def test_type + assert_equal(Arrow::Type::MAP, @data_type.id) + end + + def test_name + assert_equal("map", @data_type.name) + end + + def test_to_s + assert_equal("map<string, int16>", @data_type.to_s) + end + + def test_key + assert_equal(@key_type, @data_type.key_type) + end + + def test_item + assert_equal(@item_type, @data_type.item_type) + end +end diff --git a/src/arrow/c_glib/test/test-map-scalar.rb b/src/arrow/c_glib/test/test-map-scalar.rb new file mode 100644 index 000000000..9c6eb69e0 --- /dev/null +++ b/src/arrow/c_glib/test/test-map-scalar.rb @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestMapScalar < Test::Unit::TestCase + include Helper::Buildable + + def setup + @value = build_struct_array([ + Arrow::Field.new("key", + Arrow::StringDataType.new, + false), + Arrow::Field.new("value", + Arrow::Int8DataType.new), + ], + [ + { + "key" => "hello", + "value" => 1, + }, + { + "key" => "world", + "value" => 2, + }, + ]) + @scalar = Arrow::MapScalar.new(@value) + end + + def test_data_type + assert_equal(@value.value_data_type, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::MapScalar.new(@value), + @scalar) + end + + def test_to_s + assert_equal("...", @scalar.to_s) + end + + def test_value + assert_equal(@value, @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-memory-mapped-input-stream.rb b/src/arrow/c_glib/test/test-memory-mapped-input-stream.rb new file mode 100644 index 000000000..7c5f933b4 --- /dev/null +++ b/src/arrow/c_glib/test/test-memory-mapped-input-stream.rb @@ -0,0 +1,84 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestMemoryMappedInputStream < Test::Unit::TestCase + def setup + @data = "Hello World" + @tempfile = Tempfile.open("arrow-memory-mapped-input-stream") + @tempfile.write(@data) + @tempfile.close + end + + def test_new + input = Arrow::MemoryMappedInputStream.new(@tempfile.path) + begin + buffer = input.read(5) + assert_equal("Hello", buffer.data.to_s) + ensure + input.close + end + end + + def test_close + input = Arrow::MemoryMappedInputStream.new(@tempfile.path) + assert do + not input.closed? + end + input.close + assert do + input.closed? + end + end + + def test_size + input = Arrow::MemoryMappedInputStream.new(@tempfile.path) + begin + assert_equal(@data.bytesize, input.size) + ensure + input.close + end + end + + def test_read + input = Arrow::MemoryMappedInputStream.new(@tempfile.path) + begin + buffer = input.read(5) + assert_equal("Hello", buffer.data.to_s) + ensure + input.close + end + end + + def test_read_at + input = Arrow::MemoryMappedInputStream.new(@tempfile.path) + begin + buffer = input.read_at(6, 5) + assert_equal("World", buffer.data.to_s) + ensure + input.close + end + end + + def test_mode + input = Arrow::MemoryMappedInputStream.new(@tempfile.path) + begin + assert_equal(Arrow::FileMode::READWRITE, input.mode) + ensure + input.close + end + end +end diff --git a/src/arrow/c_glib/test/test-mock-file-system.rb b/src/arrow/c_glib/test/test-mock-file-system.rb new file mode 100644 index 000000000..c6148d699 --- /dev/null +++ b/src/arrow/c_glib/test/test-mock-file-system.rb @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require_relative "file-system-tests" + +class TestMockFileSystem < Test::Unit::TestCase + include FileSystemTests + + def setup + @fs = Arrow::FileSystem.create("mock://") + end + + def test_type_name + assert_equal("mock", @fs.type_name) + end +end diff --git a/src/arrow/c_glib/test/test-mutable-buffer.rb b/src/arrow/c_glib/test/test-mutable-buffer.rb new file mode 100644 index 000000000..ccfd6e222 --- /dev/null +++ b/src/arrow/c_glib/test/test-mutable-buffer.rb @@ -0,0 +1,74 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestMutableBuffer < Test::Unit::TestCase + def setup + @data = "Hello" + @buffer = Arrow::MutableBuffer.new(@data) + end + + def test_new_bytes + bytes_data = GLib::Bytes.new(@data) + buffer = Arrow::MutableBuffer.new(bytes_data) + if GLib.check_binding_version?(3, 2, 2) + assert_equal(bytes_data.pointer, buffer.mutable_data.pointer) + else + assert_equal(@data, buffer.mutable_data.to_s) + end + end + + def test_mutable? + assert do + @buffer.mutable? + end + end + + def test_mutable_data + assert_equal(@data, @buffer.mutable_data.to_s) + end + + def test_slice + sliced_buffer = @buffer.slice(1, 3) + assert_equal(@data[1, 3], sliced_buffer.data.to_s) + end + + sub_test_case("#set_data") do + test("offset") do + @buffer.set_data(1, "EL") + assert_equal("HELlo", @buffer.data.to_s) + end + + test("replace") do + @buffer.set_data(0, "World") + assert_equal("World", @buffer.data.to_s) + end + + test("offset: too large") do + message = "[mutable-buffer][set-data]: Data is too large: <(5 + 1) > (5)>" + assert_raise(Arrow::Error::Invalid.new(message)) do + @buffer.set_data(5, "X") + end + end + + test("data too large") do + message = "[mutable-buffer][set-data]: Data is too large: <(0 + 6) > (5)>" + assert_raise(Arrow::Error::Invalid.new(message)) do + @buffer.set_data(0, @data + "!") + end + end + end +end diff --git a/src/arrow/c_glib/test/test-null-array.rb b/src/arrow/c_glib/test/test-null-array.rb new file mode 100644 index 000000000..6aa8c037c --- /dev/null +++ b/src/arrow/c_glib/test/test-null-array.rb @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestNullArray < Test::Unit::TestCase + def test_length + array = Arrow::NullArray.new(3) + assert_equal(3, array.length) + end + + def test_n_nulls + array = Arrow::NullArray.new(3) + assert_equal(3, array.n_nulls) + end + + def test_slice + array = Arrow::NullArray.new(3) + assert_equal(2, array.slice(1, 2).length) + end +end diff --git a/src/arrow/c_glib/test/test-null-data-type.rb b/src/arrow/c_glib/test/test-null-data-type.rb new file mode 100644 index 000000000..8440b9900 --- /dev/null +++ b/src/arrow/c_glib/test/test-null-data-type.rb @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestNullDataType < Test::Unit::TestCase + def test_type + data_type = Arrow::NullDataType.new + assert_equal(Arrow::Type::NA, data_type.id) + end + + def test_name + data_type = Arrow::NullDataType.new + assert_equal("null", data_type.name) + end + + def test_to_s + data_type = Arrow::NullDataType.new + assert_equal("null", data_type.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-null-scalar.rb b/src/arrow/c_glib/test/test-null-scalar.rb new file mode 100644 index 000000000..07b887040 --- /dev/null +++ b/src/arrow/c_glib/test/test-null-scalar.rb @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestNullScalar < Test::Unit::TestCase + def setup + @scalar = Arrow::NullScalar.new + end + + def test_data_type + assert_equal(Arrow::NullDataType.new, + @scalar.data_type) + end + + def test_valid? + assert do + not @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::NullScalar.new, + @scalar) + end + + def test_to_s + assert_equal("null", @scalar.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-numeric-array.rb b/src/arrow/c_glib/test/test-numeric-array.rb new file mode 100644 index 000000000..f90007c03 --- /dev/null +++ b/src/arrow/c_glib/test/test-numeric-array.rb @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestNumericArray < Test::Unit::TestCase + include Helper::Buildable + + def test_mean + array = build_double_array([1.1, 2.2, nil]) + assert_in_delta(array.values.inject(&:+) / 2, + array.mean) + end +end diff --git a/src/arrow/c_glib/test/test-orc-file-reader.rb b/src/arrow/c_glib/test/test-orc-file-reader.rb new file mode 100644 index 000000000..38900cf12 --- /dev/null +++ b/src/arrow/c_glib/test/test-orc-file-reader.rb @@ -0,0 +1,218 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestORCFileReader < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + include Helper::Fixture + + def setup + omit("Require Apache Arrow ORC") unless Arrow.const_defined?(:ORCFileReader) + path = fixture_path("TestOrcFile.test1.orc") + input = Arrow::MemoryMappedInputStream.new(path) + @reader = Arrow::ORCFileReader.new(input) + end + + def test_read_type + assert_equal(<<-SCHEMA.chomp, @reader.read_type.to_s) +boolean1: bool +byte1: int8 +short1: int16 +int1: int32 +long1: int64 +float1: float +double1: double +bytes1: binary +string1: string +middle: struct<list: list<item: struct<int1: int32, string1: string>>> +list: list<item: struct<int1: int32, string1: string>> +map: map<string, struct<int1: int32, string1: string>> + SCHEMA + end + + def test_field_indices + require_gi_bindings(3, 2, 6) + assert_nil(@reader.field_indices) + @reader.field_indices = [1, 3] + assert_equal([1, 3], @reader.field_indices) + end + + def item_fields + [ + Arrow::Field.new("int1", Arrow::Int32DataType.new), + Arrow::Field.new("string1", Arrow::StringDataType.new), + ] + end + + def item_data_type + Arrow::StructDataType.new(item_fields) + end + + def build_items_array(items_array) + build_list_array(item_data_type, items_array, field_name: "item") + end + + def items_data_type + Arrow::ListDataType.new(Arrow::Field.new("item", item_data_type)) + end + + def middle_fields + [ + Arrow::Field.new("list", items_data_type), + ] + end + + def build_middle_array(middles) + build_struct_array(middle_fields, middles) + end + + def middle_array + build_middle_array([ + { + "list" => [ + { + "int1" => 1, + "string1" => "bye", + }, + { + "int1" => 2, + "string1" => "sigh", + }, + ], + }, + { + "list" => [ + { + "int1" => 1, + "string1" => "bye", + }, + { + "int1" => 2, + "string1" => "sigh", + }, + ], + }, + ]) + end + + def list_array + build_items_array([ + [ + { + "int1" => 3, + "string1" => "good", + }, + { + "int1" => 4, + "string1" => "bad", + }, + ], + [ + { + "int1" => 100000000, + "string1" => "cat", + }, + { + "int1" => -100000, + "string1" => "in", + }, + { + "int1" => 1234, + "string1" => "hat", + }, + ] + ]) + end + + def map_array + build_map_array(Arrow::StringDataType.new, + item_data_type, + [ + {}, + { + "chani" => { + "int1" => 5, + "string1" => "chani", + }, + "mauddib" => { + "int1" => 1, + "string1" => "mauddib", + }, + }, + ]) + end + + def all_columns + { + "boolean1" => build_boolean_array([false, true]), + "byte1" => build_int8_array([1, 100]), + "short1" => build_int16_array([1024, 2048]), + "int1" => build_int32_array([65536, 65536]), + "long1" => build_int64_array([ + 9223372036854775807, + 9223372036854775807, + ]), + "float1" => build_float_array([1.0, 2.0]), + "double1" => build_double_array([-15.0, -5.0]), + "bytes1" => build_binary_array(["\x00\x01\x02\x03\x04", ""]), + "string1" => build_string_array(["hi", "bye"]), + "middle" => middle_array, + "list" => list_array, + "map" => map_array, + } + end + + sub_test_case("#read_stripes") do + test("all") do + assert_equal(build_table(all_columns), + @reader.read_stripes) + end + + test("select fields") do + require_gi_bindings(3, 2, 6) + @reader.field_indices = [1, 3] + assert_equal(build_table("boolean1" => build_boolean_array([false, true]), + "short1" => build_int16_array([1024, 2048])), + @reader.read_stripes) + end + end + + sub_test_case("#read_stripe") do + test("all") do + assert_equal(build_record_batch(all_columns), + @reader.read_stripe(0)) + end + + test("select fields") do + require_gi_bindings(3, 2, 6) + @reader.field_indices = [1, 3] + boolean1 = build_boolean_array([false, true]) + short1 = build_int16_array([1024, 2048]) + assert_equal(build_record_batch("boolean1" => boolean1, + "short1" => short1), + @reader.read_stripe(0)) + end + end + + def test_n_stripes + assert_equal(1, @reader.n_stripes) + end + + def test_n_rows + assert_equal(2, @reader.n_rows) + end +end diff --git a/src/arrow/c_glib/test/test-read-options.rb b/src/arrow/c_glib/test/test-read-options.rb new file mode 100644 index 000000000..5b2a69cd3 --- /dev/null +++ b/src/arrow/c_glib/test/test-read-options.rb @@ -0,0 +1,61 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestReadOptions < Test::Unit::TestCase + def setup + @options = Arrow::ReadOptions.new + end + + sub_test_case("max-recursion-depth") do + def test_default + assert_equal(64, @options.max_recursion_depth) + end + + def test_accessor + @options.max_recursion_depth = 29 + assert_equal(29, @options.max_recursion_depth) + end + end + + sub_test_case("use-threads") do + def test_default + assert do + @options.use_threads? + end + end + + def test_accessor + @options.use_threads = false + assert do + not @options.use_threads? + end + end + end + + sub_test_case("#included_fields") do + def test_default + assert_equal([], @options.included_fields) + end + + def test_accessor + @options.included_fields = [1, 2, 3] + assert_equal([1, 2, 3], @options.included_fields) + @options.included_fields = [] + assert_equal([], @options.included_fields) + end + end +end diff --git a/src/arrow/c_glib/test/test-record-batch-builder.rb b/src/arrow/c_glib/test/test-record-batch-builder.rb new file mode 100644 index 000000000..ce8efdffd --- /dev/null +++ b/src/arrow/c_glib/test/test-record-batch-builder.rb @@ -0,0 +1,86 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestRecordBatchBuilder < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + def setup + @fields = [ + Arrow::Field.new("visible", Arrow::BooleanDataType.new), + Arrow::Field.new("point", Arrow::Int32DataType.new), + ] + @schema = Arrow::Schema.new(@fields) + @builder = Arrow::RecordBatchBuilder.new(@schema) + end + + def test_initial_capacity + @builder.initial_capacity = 128 + assert_equal(128, @builder.initial_capacity) + end + + def test_schema + assert_equal(@schema, @builder.schema) + end + + def test_n_columns + assert_equal(@fields.size, @builder.n_columns) + end + + sub_test_case("#get_column_builder") do + def test_valid + assert_equal(Arrow::BooleanArrayBuilder, + @builder.get_column_builder(0).class) + end + + def test_negative + assert_equal(Arrow::Int32ArrayBuilder, + @builder.get_column_builder(-1).class) + end + + def test_too_negative + assert_nil(@builder.get_column_builder(-@fields.size - 1)) + end + + def test_too_large + assert_nil(@builder.get_column_builder(@fields.size)) + end + end + + def test_flush + require_gi_bindings(3, 3, 1) + arrays = { + "visible" => build_boolean_array([true, false, true]), + "point" => build_int32_array([1, -1, 0]), + } + arrays.each_with_index do |(_, array), i| + @builder.get_column_builder(i).append_values(array.values, []) + end + assert_equal(build_record_batch(arrays), + @builder.flush) + + arrays = { + "visible" => build_boolean_array([false, true]), + "point" => build_int32_array([10, -10]), + } + arrays.each_with_index do |(_, array), i| + @builder.get_column_builder(i).append_values(array.values, []) + end + assert_equal(build_record_batch(arrays), + @builder.flush) + end +end diff --git a/src/arrow/c_glib/test/test-record-batch-datum.rb b/src/arrow/c_glib/test/test-record-batch-datum.rb new file mode 100644 index 000000000..33eb793ba --- /dev/null +++ b/src/arrow/c_glib/test/test-record-batch-datum.rb @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestRecordBatchDatum < Test::Unit::TestCase + include Helper::Buildable + + def setup + @array = build_boolean_array([true, false]) + @record_batch = build_record_batch("visible" => @array) + @datum = Arrow::RecordBatchDatum.new(@record_batch) + end + + def test_array? + assert do + not @datum.array? + end + end + + def test_array_like? + assert do + not @datum.array_like? + end + end + + sub_test_case("==") do + def test_true + assert_equal(Arrow::RecordBatchDatum.new(@record_batch), + Arrow::RecordBatchDatum.new(@record_batch)) + end + + def test_false + assert_not_equal(@datum, + Arrow::ArrayDatum.new(@array)) + end + end + + def test_to_string + assert_equal("RecordBatch", @datum.to_s) + end + + def test_value + assert_equal(@record_batch, @datum.value) + end +end diff --git a/src/arrow/c_glib/test/test-record-batch-iterator.rb b/src/arrow/c_glib/test/test-record-batch-iterator.rb new file mode 100644 index 000000000..daedde759 --- /dev/null +++ b/src/arrow/c_glib/test/test-record-batch-iterator.rb @@ -0,0 +1,51 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestRecordBatchIterator <Test::Unit::TestCase + include Helper::Buildable + + def setup + fields = [ + Arrow::Field.new("visible", Arrow::BooleanDataType.new), + Arrow::Field.new("point", Arrow::Int32DataType.new), + ] + schema = Arrow::Schema.new(fields) + @record_batches = [ + [ + build_boolean_array([true, false, true]), + build_int32_array([1, 2, 3]), + ], + [ + build_boolean_array([false, true, false, true]), + build_int32_array([-1, -2, -3, -4]), + ] + ].collect do |columns| + Arrow::RecordBatch.new(schema, columns[0].length, columns) + end + @iterator = Arrow::RecordBatchIterator.new(@record_batches) + end + + def test_next + assert_equal(@record_batches[0], @iterator.next) + assert_equal(@record_batches[1], @iterator.next) + assert_equal(nil, @iterator.next) + end + + def test_to_list + assert_equal(@record_batches, @iterator.to_list) + end +end diff --git a/src/arrow/c_glib/test/test-record-batch-reader.rb b/src/arrow/c_glib/test/test-record-batch-reader.rb new file mode 100644 index 000000000..489482141 --- /dev/null +++ b/src/arrow/c_glib/test/test-record-batch-reader.rb @@ -0,0 +1,64 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestRecordBatchReader <Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + def setup + fields = [ + Arrow::Field.new("visible", Arrow::BooleanDataType.new), + Arrow::Field.new("point", Arrow::Int32DataType.new), + ] + @schema = Arrow::Schema.new(fields) + @record_batches = [ + [ + build_boolean_array([true, false, true]), + build_int32_array([1, 2, 3]), + ], + [ + build_boolean_array([false, true, false, true]), + build_int32_array([-1, -2, -3, -4]), + ] + ].collect do |columns| + Arrow::RecordBatch.new(@schema, columns[0].length, columns) + end + @reader = Arrow::RecordBatchReader.new(@record_batches, @schema) + end + + def test_export + require_gi_bindings(3, 4, 8) + c_abi_array_stream = @reader.export + assert_equal(Arrow::Table.new(@schema, @record_batches), + Arrow::RecordBatchReader.import(c_abi_array_stream).read_all) + end + + def test_schema + assert_equal(@schema, @reader.schema) + end + + def test_read_next + assert_equal(@record_batches[0], @reader.read_next) + assert_equal(@record_batches[1], @reader.read_next) + assert_nil(@reader.read_next) + end + + def test_read_all + assert_equal(Arrow::Table.new(@schema, @record_batches), + @reader.read_all) + end +end diff --git a/src/arrow/c_glib/test/test-record-batch.rb b/src/arrow/c_glib/test/test-record-batch.rb new file mode 100644 index 000000000..bbdbf82d0 --- /dev/null +++ b/src/arrow/c_glib/test/test-record-batch.rb @@ -0,0 +1,193 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestRecordBatch < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + sub_test_case(".new") do + def test_valid + fields = [ + Arrow::Field.new("visible", Arrow::BooleanDataType.new), + Arrow::Field.new("valid", Arrow::BooleanDataType.new), + ] + schema = Arrow::Schema.new(fields) + columns = [ + build_boolean_array([true]), + build_boolean_array([false]), + ] + record_batch = Arrow::RecordBatch.new(schema, 1, columns) + assert_equal(1, record_batch.n_rows) + end + + def test_no_columns + fields = [ + Arrow::Field.new("visible", Arrow::BooleanDataType.new), + ] + schema = Arrow::Schema.new(fields) + message = "[record-batch][new]: " + + "Invalid: Number of columns did not match schema" + assert_raise(Arrow::Error::Invalid.new(message)) do + Arrow::RecordBatch.new(schema, 0, []) + end + end + end + + sub_test_case("instance methods") do + def setup + @visible_field = Arrow::Field.new("visible", Arrow::BooleanDataType.new) + @visible_values = [true, false, true, false, true] + @valid_field = Arrow::Field.new("valid", Arrow::BooleanDataType.new) + @valid_values = [false, true, false, true, false] + + fields = [ + @visible_field, + @valid_field, + ] + schema = Arrow::Schema.new(fields) + columns = [ + build_boolean_array(@visible_values), + build_boolean_array(@valid_values), + ] + @record_batch = Arrow::RecordBatch.new(schema, + @visible_values.size, + columns) + end + + def test_export + require_gi_bindings(3, 4, 8) + success, c_abi_array, c_abi_schema = @record_batch.export + schema = Arrow::Schema.import(c_abi_schema) + assert_equal([success, @record_batch], + [true, Arrow::RecordBatch.import(c_abi_array, schema)]) + end + + sub_test_case("#equal") do + def setup + require_gi_bindings(3, 4, 2) + + @fields = [ + Arrow::Field.new("visible", Arrow::BooleanDataType.new), + Arrow::Field.new("valid", Arrow::BooleanDataType.new), + ] + @schema = Arrow::Schema.new(@fields) + @columns = [ + build_boolean_array([true, false, true, false, true]), + build_boolean_array([false, true, false, true, false]), + ] + @record_batch = Arrow::RecordBatch.new(@schema, 5, @columns) + end + + def test_equal + other_record_batch = Arrow::RecordBatch.new(@schema, 5, @columns) + assert_equal(@record_batch, other_record_batch) + end + + def test_equal_metadata + schema_with_meta = @schema.with_metadata("key" => "value") + other_record_batch = Arrow::RecordBatch.new(schema_with_meta, 5, @columns) + + assert @record_batch.equal_metadata(other_record_batch, false) + assert do + not @record_batch.equal_metadata(other_record_batch, true) + end + end + end + + def test_schema + assert_equal(["visible", "valid"], + @record_batch.schema.fields.collect(&:name)) + end + + sub_test_case("#column_data") do + def test_positive + assert_equal(build_boolean_array(@valid_values), + @record_batch.get_column_data(1)) + end + + def test_negative + assert_equal(build_boolean_array(@visible_values), + @record_batch.get_column_data(-2)) + end + + def test_positive_out_of_index + assert_nil(@record_batch.get_column_data(2)) + end + + def test_negative_out_of_index + assert_nil(@record_batch.get_column_data(-3)) + end + end + + def test_n_columns + assert_equal(2, @record_batch.n_columns) + end + + def test_n_rows + assert_equal(5, @record_batch.n_rows) + end + + def test_slice + sub_record_batch = @record_batch.slice(3, 2) + sub_visible_values = sub_record_batch.n_rows.times.collect do |i| + sub_record_batch.get_column_data(0).get_value(i) + end + assert_equal([false, true], + sub_visible_values) + end + + def test_to_s + assert_equal(<<-PRETTY_PRINT, @record_batch.to_s) +visible: [ + true, + false, + true, + false, + true + ] +valid: [ + false, + true, + false, + true, + false + ] + PRETTY_PRINT + end + + def test_add_column + field = Arrow::Field.new("added", Arrow::BooleanDataType.new) + column = build_boolean_array([false, false, true, true, true]) + new_record_batch = @record_batch.add_column(1, field, column) + assert_equal(["visible", "added", "valid"], + new_record_batch.schema.fields.collect(&:name)) + end + + def test_remove_column + new_record_batch = @record_batch.remove_column(0) + assert_equal(["valid"], + new_record_batch.schema.fields.collect(&:name)) + end + + def test_serialize + buffer = @record_batch.serialize + input_stream = Arrow::BufferInputStream.new(buffer) + assert_equal(@record_batch, + input_stream.read_record_batch(@record_batch.schema)) + end + end +end diff --git a/src/arrow/c_glib/test/test-resizable-buffer.rb b/src/arrow/c_glib/test/test-resizable-buffer.rb new file mode 100644 index 000000000..84d95dec5 --- /dev/null +++ b/src/arrow/c_glib/test/test-resizable-buffer.rb @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestResizableBuffer < Test::Unit::TestCase + def setup + @buffer = Arrow::ResizableBuffer.new(0) + end + + def test_resize + @buffer.resize(1) + assert_equal(1, @buffer.size) + end + + def test_reserve + @buffer.reserve(1) + assert_equal(64, @buffer.capacity) + end +end diff --git a/src/arrow/c_glib/test/test-scalar-aggregate-options.rb b/src/arrow/c_glib/test/test-scalar-aggregate-options.rb new file mode 100644 index 000000000..a794b5324 --- /dev/null +++ b/src/arrow/c_glib/test/test-scalar-aggregate-options.rb @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestScalarAggregateOptions < Test::Unit::TestCase + def setup + @options = Arrow::ScalarAggregateOptions.new + end + + sub_test_case("skip_nulls") do + def test_default + assert do + @options.skip_nulls? + end + end + + def test_accessor + @options.skip_nulls = false + assert do + not @options.skip_nulls? + end + end + end + + sub_test_case("min_count") do + def test_default + assert_equal(1, @options.min_count) + end + + def test_accessor + @options.min_count = 0 + assert_equal(0, @options.min_count) + end + end +end diff --git a/src/arrow/c_glib/test/test-scalar-datum.rb b/src/arrow/c_glib/test/test-scalar-datum.rb new file mode 100644 index 000000000..17e5d6b06 --- /dev/null +++ b/src/arrow/c_glib/test/test-scalar-datum.rb @@ -0,0 +1,69 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestScalarDatum < Test::Unit::TestCase + include Helper::Buildable + + def setup + @scalar = Arrow::BooleanScalar.new(true) + @datum = Arrow::ScalarDatum.new(@scalar) + end + + def test_array? + assert do + not @datum.array? + end + end + + def test_array_like? + assert do + not @datum.array_like? + end + end + + def test_scalar? + assert do + @datum.scalar? + end + end + + def test_value? + assert do + @datum.value? + end + end + + sub_test_case("==") do + def test_true + assert_equal(Arrow::ScalarDatum.new(@scalar), + Arrow::ScalarDatum.new(@scalar)) + end + + def test_false + assert_not_equal(@datum, + Arrow::ArrayDatum.new(build_boolean_array([true, false]))) + end + end + + def test_to_string + assert_equal("Scalar", @datum.to_s) + end + + def test_value + assert_equal(@scalar, @datum.value) + end +end diff --git a/src/arrow/c_glib/test/test-schema.rb b/src/arrow/c_glib/test/test-schema.rb new file mode 100644 index 000000000..e90dd897a --- /dev/null +++ b/src/arrow/c_glib/test/test-schema.rb @@ -0,0 +1,214 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestSchema < Test::Unit::TestCase + include Helper::Omittable + + def test_export + require_gi_bindings(3, 4, 8) + fields = [ + Arrow::Field.new("enabled", Arrow::BooleanDataType.new), + ] + schema = Arrow::Schema.new(fields) + c_abi_schema = schema.export + assert_equal(schema, + Arrow::Schema.import(c_abi_schema)) + end + + def test_equal + fields1 = [ + Arrow::Field.new("enabled", Arrow::BooleanDataType.new), + ] + fields2 = [ + Arrow::Field.new("enabled", Arrow::BooleanDataType.new), + ] + assert_equal(Arrow::Schema.new(fields1), + Arrow::Schema.new(fields2)) + end + + def test_field + field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new) + schema = Arrow::Schema.new([field]) + assert_equal("enabled", schema.get_field(0).name) + end + + sub_test_case("#get_field_by_name") do + def test_found + field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new) + schema = Arrow::Schema.new([field]) + assert_equal("enabled", schema.get_field_by_name("enabled").name) + end + + def test_not_found + field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new) + schema = Arrow::Schema.new([field]) + assert_nil(schema.get_field_by_name("nonexistent")) + end + end + + sub_test_case("#get_field_index") do + def test_found + field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new) + schema = Arrow::Schema.new([field]) + assert_equal(0, schema.get_field_index("enabled")) + end + + def test_not_found + field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new) + schema = Arrow::Schema.new([field]) + assert_equal(-1, schema.get_field_index("nonexistent")) + end + end + + def test_n_fields + fields = [ + Arrow::Field.new("enabled", Arrow::BooleanDataType.new), + Arrow::Field.new("required", Arrow::BooleanDataType.new), + ] + schema = Arrow::Schema.new(fields) + assert_equal(2, schema.n_fields) + end + + def test_fields + fields = [ + Arrow::Field.new("enabled", Arrow::BooleanDataType.new), + Arrow::Field.new("required", Arrow::BooleanDataType.new), + ] + schema = Arrow::Schema.new(fields) + assert_equal(["enabled", "required"], + schema.fields.collect(&:name)) + end + + def test_to_s + fields = [ + Arrow::Field.new("enabled", Arrow::BooleanDataType.new), + Arrow::Field.new("required", Arrow::BooleanDataType.new), + ] + schema = Arrow::Schema.new(fields) + assert_equal(<<-SCHEMA.chomp, schema.to_s) +enabled: bool +required: bool + SCHEMA + end + + sub_test_case("#to_string_metadata") do + def setup + require_gi_bindings(3, 4, 2) + + fields = [ + Arrow::Field.new("enabled", Arrow::BooleanDataType.new), + Arrow::Field.new("required", Arrow::BooleanDataType.new), + ] + schema = Arrow::Schema.new(fields) + @schema = schema.with_metadata("key" => "value") + end + + def test_true + assert_equal(<<-SCHEMA.chomp, @schema.to_string_metadata(true)) +enabled: bool +required: bool +-- metadata -- +key: value + SCHEMA + end + + def test_false + assert_equal(<<-SCHEMA.chomp, @schema.to_string_metadata(false)) +enabled: bool +required: bool + SCHEMA + end + end + + def test_add_field + fields = [ + Arrow::Field.new("enabled", Arrow::BooleanDataType.new), + Arrow::Field.new("required", Arrow::BooleanDataType.new) + ] + schema = Arrow::Schema.new(fields) + new_field = Arrow::Field.new("new", Arrow::BooleanDataType.new) + new_schema = schema.add_field(1, new_field) + assert_equal(<<-SCHEMA.chomp, new_schema.to_s) +enabled: bool +new: bool +required: bool + SCHEMA + end + + def test_remove_field + fields = [ + Arrow::Field.new("enabled", Arrow::BooleanDataType.new), + Arrow::Field.new("required", Arrow::BooleanDataType.new) + ] + schema = Arrow::Schema.new(fields) + new_schema = schema.remove_field(0) + assert_equal(<<-SCHEMA.chomp, new_schema.to_s) +required: bool + SCHEMA + end + + def test_replace_field + fields = [ + Arrow::Field.new("enabled", Arrow::BooleanDataType.new), + Arrow::Field.new("required", Arrow::BooleanDataType.new) + ] + schema = Arrow::Schema.new(fields) + new_field = Arrow::Field.new("new", Arrow::BooleanDataType.new) + new_schema = schema.replace_field(1, new_field) + assert_equal(<<-SCHEMA.chomp, new_schema.to_s) +enabled: bool +new: bool + SCHEMA + end + + def test_has_metadata + fields = [ + Arrow::Field.new("enabled", Arrow::BooleanDataType.new), + Arrow::Field.new("required", Arrow::BooleanDataType.new), + ] + schema = Arrow::Schema.new(fields) + assert do + not schema.has_metadata? + end + schema_with_metadata = schema.with_metadata("key" => "value") + assert do + schema_with_metadata.has_metadata? + end + end + + sub_test_case("#metadata") do + def setup + require_gi_bindings(3, 4, 2) + + fields = [ + Arrow::Field.new("enabled", Arrow::BooleanDataType.new), + Arrow::Field.new("required", Arrow::BooleanDataType.new), + ] + @schema = Arrow::Schema.new(fields) + end + + def test_existent + schema_with_metadata = @schema.with_metadata("key" => "value") + assert_equal({"key" => "value"}, + schema_with_metadata.metadata) + end + + def test_nonexistent + assert_nil(@schema.metadata) + end + end +end diff --git a/src/arrow/c_glib/test/test-set-lookup-options.rb b/src/arrow/c_glib/test/test-set-lookup-options.rb new file mode 100644 index 000000000..779bacef6 --- /dev/null +++ b/src/arrow/c_glib/test/test-set-lookup-options.rb @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestSetLookupOptions < Test::Unit::TestCase + include Helper::Buildable + + def test_new + value_set = Arrow::ArrayDatum.new(build_int8_array([1, 2, 3])) + options = Arrow::SetLookupOptions.new(value_set) + assert_equal(value_set, options.value_set) + end + + sub_test_case("instance methods") do + def setup + value_set = Arrow::ArrayDatum.new(build_int8_array([1, 2, 3])) + @options = Arrow::SetLookupOptions.new(value_set) + end + + def test_skip_nulls + assert do + not @options.skip_nulls? + end + @options.skip_nulls = true + assert do + @options.skip_nulls? + end + end + end +end diff --git a/src/arrow/c_glib/test/test-slow-file-system.rb b/src/arrow/c_glib/test/test-slow-file-system.rb new file mode 100644 index 000000000..0e36dca48 --- /dev/null +++ b/src/arrow/c_glib/test/test-slow-file-system.rb @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require_relative "file-system-tests" + +class TestSlowFileSystem < Test::Unit::TestCase + def setup + Dir.mktmpdir do |tmpdir| + options = Arrow::LocalFileSystemOptions.new + local_fs = Arrow::LocalFileSystem.new(options) + subtree_fs = Arrow::SubTreeFileSystem.new(tmpdir, local_fs) + @fs = Arrow::SlowFileSystem.new(subtree_fs, 0.001) + yield + end + end + + include FileSystemTests + + def test_type_name + assert_equal([ + "slow", + "subtree", + ], + [ + @fs.type_name, + @fs.base_file_system.type_name, + ]) + end +end diff --git a/src/arrow/c_glib/test/test-sort-indices.rb b/src/arrow/c_glib/test/test-sort-indices.rb new file mode 100644 index 000000000..a8c4f40c5 --- /dev/null +++ b/src/arrow/c_glib/test/test-sort-indices.rb @@ -0,0 +1,69 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestSortIndices < Test::Unit::TestCase + include Helper::Buildable + + def test_array + array = build_int16_array([nil, 1, 0, nil, 4, 3]) + assert_equal(build_uint64_array([2, 1, 5, 4, 0, 3]), + array.sort_indices(:ascending)) + end + + def test_chunked_array + arrays = [ + build_int16_array([1]), + build_int16_array([0, 4, -3]), + ] + chunked_array = Arrow::ChunkedArray.new(arrays) + assert_equal(build_uint64_array([3, 1, 0, 2]), + chunked_array.sort_indices(:ascending)) + end + + def test_record_batch + columns = { + column1: build_int16_array([ 1, 0, 4, 4, -3, 1]), + column2: build_string_array(["a", "a", "b", "c", "d", "a"]), + } + record_batch = build_record_batch(columns) + sort_keys = [ + Arrow::SortKey.new("column1", :ascending), + Arrow::SortKey.new("column2", :descending), + ] + options = Arrow::SortOptions.new(sort_keys) + assert_equal(build_uint64_array([4, 1, 0, 5, 3, 2]), + record_batch.sort_indices(options)) + end + + def test_table + raw_array1 = [ 1, 0, 4, 4, -3, 1] + raw_array2 = ["a", "a", "b", "c", "d", "a"] + columns = { + column1: Arrow::ChunkedArray.new([build_int16_array(raw_array1[0...1]), + build_int16_array(raw_array1[1...3]), + build_int16_array(raw_array1[3..-1])]), + column2: Arrow::ChunkedArray.new([build_string_array(raw_array2[0...2]), + build_string_array(raw_array2[2..-1])]), + } + table = build_table(columns) + options = Arrow::SortOptions.new + options.add_sort_key(Arrow::SortKey.new("column1", :ascending)) + options.add_sort_key(Arrow::SortKey.new("column2", :descending)) + assert_equal(build_uint64_array([4, 1, 0, 5, 3, 2]), + table.sort_indices(options)) + end +end diff --git a/src/arrow/c_glib/test/test-sort-options.rb b/src/arrow/c_glib/test/test-sort-options.rb new file mode 100644 index 000000000..e57645b1c --- /dev/null +++ b/src/arrow/c_glib/test/test-sort-options.rb @@ -0,0 +1,59 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestSortOptions < Test::Unit::TestCase + include Helper::Buildable + + def test_new + sort_keys = [ + Arrow::SortKey.new("column1", :ascending), + Arrow::SortKey.new("column2", :descending), + ] + options = Arrow::SortOptions.new(sort_keys) + assert_equal(sort_keys, options.sort_keys) + end + + def test_add_sort_key + options = Arrow::SortOptions.new + options.add_sort_key(Arrow::SortKey.new("column1", :ascending)) + options.add_sort_key(Arrow::SortKey.new("column2", :descending)) + assert_equal([ + Arrow::SortKey.new("column1", :ascending), + Arrow::SortKey.new("column2", :descending), + ], + options.sort_keys) + end + + def test_set_sort_keys + options = Arrow::SortOptions.new([Arrow::SortKey.new("column3", :ascending)]) + sort_keys = [ + Arrow::SortKey.new("column1", :ascending), + Arrow::SortKey.new("column2", :descending), + ] + options.sort_keys = sort_keys + assert_equal(sort_keys, options.sort_keys) + end + + def test_equal + sort_keys = [ + Arrow::SortKey.new("column1", :ascending), + Arrow::SortKey.new("column2", :descending), + ] + assert_equal(Arrow::SortOptions.new(sort_keys), + Arrow::SortOptions.new(sort_keys)) + end +end diff --git a/src/arrow/c_glib/test/test-source-node.rb b/src/arrow/c_glib/test/test-source-node.rb new file mode 100644 index 000000000..e04c9b72b --- /dev/null +++ b/src/arrow/c_glib/test/test-source-node.rb @@ -0,0 +1,67 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestSourceNode < Test::Unit::TestCase + include Helper::Buildable + + def execute_plan(options) + plan = Arrow::ExecutePlan.new + source_node = plan.build_source_node(options) + sink_node_options = Arrow::SinkNodeOptions.new + sink_node = plan.build_sink_node(source_node, + sink_node_options) + plan.validate + plan.start + plan.wait + reader = sink_node_options.get_reader(source_node.output_schema) + table = reader.read_all + plan.stop + table + end + + def test_record_batch_reader + numbers = build_int8_array([1, 2, 3, 4, 5]) + strings = build_string_array(["a", "b", "a", "b", "a"]) + record_batch = build_record_batch(number: numbers, + string: strings) + reader = Arrow::RecordBatchReader.new([record_batch]) + options = Arrow::SourceNodeOptions.new(reader) + assert_equal(build_table(number: numbers, + string: strings), + execute_plan(options)) + end + + def test_record_batch + numbers = build_int8_array([1, 2, 3, 4, 5]) + strings = build_string_array(["a", "b", "a", "b", "a"]) + record_batch = build_record_batch(number: numbers, + string: strings) + options = Arrow::SourceNodeOptions.new(record_batch) + assert_equal(build_table(number: numbers, + string: strings), + execute_plan(options)) + end + + def test_table + numbers = build_int8_array([1, 2, 3, 4, 5]) + strings = build_string_array(["a", "b", "a", "b", "a"]) + table = build_table(number: numbers, + string: strings) + options = Arrow::SourceNodeOptions.new(table) + assert_equal(table, execute_plan(options)) + end +end diff --git a/src/arrow/c_glib/test/test-sparse-union-array.rb b/src/arrow/c_glib/test/test-sparse-union-array.rb new file mode 100644 index 000000000..1132ccb45 --- /dev/null +++ b/src/arrow/c_glib/test/test-sparse-union-array.rb @@ -0,0 +1,86 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestSparseUnionArray < Test::Unit::TestCase + include Helper::Buildable + + sub_test_case(".new") do + sub_test_case("default") do + def setup + type_ids = build_int8_array([0, 1, 0, 1, 0]) + fields = [ + build_int16_array([1, nil, nil, nil, 5]), + build_string_array([nil, "b", nil, "d", nil]), + ] + @array = Arrow::SparseUnionArray.new(type_ids, fields) + end + + def test_value_data_type + fields = [ + Arrow::Field.new("0", Arrow::Int16DataType.new), + Arrow::Field.new("1", Arrow::StringDataType.new), + ] + assert_equal(Arrow::SparseUnionDataType.new(fields, [0, 1]), + @array.value_data_type) + end + + def test_field + assert_equal([ + build_int16_array([1, nil, nil, nil, 5]), + build_string_array([nil, "b", nil, "d", nil]), + ], + [ + @array.get_field(0), + @array.get_field(1), + ]) + end + end + + sub_test_case("DataType") do + def setup + data_type_fields = [ + Arrow::Field.new("number", Arrow::Int16DataType.new), + Arrow::Field.new("text", Arrow::StringDataType.new), + ] + type_codes = [11, 13] + @data_type = Arrow::SparseUnionDataType.new(data_type_fields, type_codes) + type_ids = build_int8_array([11, 13, 11, 13, 11]) + fields = [ + build_int16_array([1, nil, nil, nil, 5]), + build_string_array([nil, "b", nil, "d", nil]), + ] + @array = Arrow::SparseUnionArray.new(@data_type, type_ids, fields) + end + + def test_value_data_type + assert_equal(@data_type, + @array.value_data_type) + end + + def test_field + assert_equal([ + build_int16_array([1, nil, nil, nil, 5]), + build_string_array([nil, "b", nil, "d", nil]), + ], + [ + @array.get_field(0), + @array.get_field(1), + ]) + end + end + end +end diff --git a/src/arrow/c_glib/test/test-sparse-union-data-type.rb b/src/arrow/c_glib/test/test-sparse-union-data-type.rb new file mode 100644 index 000000000..bf65ae0a4 --- /dev/null +++ b/src/arrow/c_glib/test/test-sparse-union-data-type.rb @@ -0,0 +1,64 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestSparseUnionDataType < Test::Unit::TestCase + def setup + @number_field_data_type = Arrow::Int32DataType.new + @text_field_data_type = Arrow::StringDataType.new + @field_data_types = [ + @number_field_data_type, + @text_field_data_type, + ] + @number_field = Arrow::Field.new("number", @number_field_data_type) + @text_field = Arrow::Field.new("text", @text_field_data_type) + @fields = [ + @number_field, + @text_field, + ] + @data_type = Arrow::SparseUnionDataType.new(@fields, [2, 9]) + end + + def test_type + assert_equal(Arrow::Type::SPARSE_UNION, @data_type.id) + end + + def test_name + assert_equal("sparse_union", @data_type.name) + end + + def test_to_s + assert_equal("sparse_union<number: int32=2, text: string=9>", + @data_type.to_s) + end + + def test_fields + assert_equal(@fields.zip(@field_data_types), + @data_type.fields.collect {|field| [field, field.data_type]}) + end + + def test_get_field + field = @data_type.get_field(0) + assert_equal([ + @fields[0], + @field_data_types[0], + ], + [ + field, + field.data_type, + ]) + end +end diff --git a/src/arrow/c_glib/test/test-sparse-union-scalar.rb b/src/arrow/c_glib/test/test-sparse-union-scalar.rb new file mode 100644 index 000000000..a7f1b0695 --- /dev/null +++ b/src/arrow/c_glib/test/test-sparse-union-scalar.rb @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestSparseUnionScalar < Test::Unit::TestCase + def setup + fields = [ + Arrow::Field.new("number", Arrow::Int8DataType.new), + Arrow::Field.new("text", Arrow::StringDataType.new), + ] + @data_type = Arrow::SparseUnionDataType.new(fields, [2, 9]) + @type_code = 2 + @value = Arrow::Int8Scalar.new(-29) + @scalar = Arrow::SparseUnionScalar.new(@data_type, @type_code, @value) + end + + def test_type_code + assert_equal(@type_code, + @scalar.type_code) + end + + def test_data_type + assert_equal(@data_type, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::SparseUnionScalar.new(@data_type, @type_code, @value), + @scalar) + end + + def test_to_s + assert_equal("union{number: int8 = -29}", @scalar.to_s) + end + + def test_value + assert_equal(@value, @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-stream-writer.rb b/src/arrow/c_glib/test/test-stream-writer.rb new file mode 100644 index 000000000..32754e208 --- /dev/null +++ b/src/arrow/c_glib/test/test-stream-writer.rb @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestStreamWriter < Test::Unit::TestCase + include Helper::Buildable + + def test_write_record_batch + data = [true] + field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new) + schema = Arrow::Schema.new([field]) + + tempfile = Tempfile.open("arrow-ipc-stream-writer") + output = Arrow::FileOutputStream.new(tempfile.path, false) + begin + stream_writer = Arrow::RecordBatchStreamWriter.new(output, schema) + begin + columns = [ + build_boolean_array(data), + ] + record_batch = Arrow::RecordBatch.new(schema, data.size, columns) + stream_writer.write_record_batch(record_batch) + ensure + stream_writer.close + end + ensure + output.close + end + + input = Arrow::MemoryMappedInputStream.new(tempfile.path) + begin + stream_reader = Arrow::RecordBatchStreamReader.new(input) + assert_equal([field.name], + stream_reader.schema.fields.collect(&:name)) + assert_equal(Arrow::RecordBatch.new(schema, + data.size, + [build_boolean_array(data)]), + stream_reader.read_next) + assert_nil(stream_reader.read_next) + ensure + input.close + end + end +end diff --git a/src/arrow/c_glib/test/test-string-array.rb b/src/arrow/c_glib/test/test-string-array.rb new file mode 100644 index 000000000..09a43d197 --- /dev/null +++ b/src/arrow/c_glib/test/test-string-array.rb @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestStringArray < Test::Unit::TestCase + include Helper::Buildable + + def test_new + value_offsets = Arrow::Buffer.new([0, 5, 11, 11].pack("l*")) + data = Arrow::Buffer.new("HelloWorld!") + assert_equal(build_string_array(["Hello", "World!", nil]), + Arrow::StringArray.new(3, + value_offsets, + data, + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_value + builder = Arrow::StringArrayBuilder.new + builder.append_string("Hello") + array = builder.finish + assert_equal("Hello", array.get_string(0)) + end + + def test_buffer + builder = Arrow::StringArrayBuilder.new + builder.append_string("Hello") + builder.append_string("World") + array = builder.finish + assert_equal("HelloWorld", array.buffer.data.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-string-data-type.rb b/src/arrow/c_glib/test/test-string-data-type.rb new file mode 100644 index 000000000..87613d289 --- /dev/null +++ b/src/arrow/c_glib/test/test-string-data-type.rb @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestStringDataType < Test::Unit::TestCase + def test_type + data_type = Arrow::StringDataType.new + assert_equal(Arrow::Type::STRING, data_type.id) + end + + def test_name + data_type = Arrow::StringDataType.new + assert_equal("utf8", data_type.name) + end + + def test_to_s + data_type = Arrow::StringDataType.new + assert_equal("string", data_type.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-string-scalar.rb b/src/arrow/c_glib/test/test-string-scalar.rb new file mode 100644 index 000000000..3b9499ef9 --- /dev/null +++ b/src/arrow/c_glib/test/test-string-scalar.rb @@ -0,0 +1,55 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestStringScalar < Test::Unit::TestCase + def setup + @buffer = Arrow::Buffer.new("Hello") + @scalar = Arrow::StringScalar.new(@buffer) + end + + def test_data_type + assert_equal(Arrow::StringDataType.new, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::StringScalar.new(@buffer), + @scalar) + end + + def test_to_s + assert_equal("Hello", @scalar.to_s) + end + + def test_value + assert_equal(@buffer, + @scalar.value) + end + + def test_cast + buffer = Arrow::Buffer.new("-10") + scalar = Arrow::StringScalar.new(buffer) + assert_equal(Arrow::Int8Scalar.new(-10), + scalar.cast(Arrow::Int8DataType.new)) + end +end diff --git a/src/arrow/c_glib/test/test-struct-array.rb b/src/arrow/c_glib/test/test-struct-array.rb new file mode 100644 index 000000000..af7e299d8 --- /dev/null +++ b/src/arrow/c_glib/test/test-struct-array.rb @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestStructArray < Test::Unit::TestCase + include Helper::Buildable + + def test_new + fields = [ + Arrow::Field.new("score", Arrow::Int8DataType.new), + Arrow::Field.new("enabled", Arrow::BooleanDataType.new), + ] + structs = [ + { + "score" => -29, + "enabled" => true, + }, + { + "score" => 2, + "enabled" => false, + }, + nil, + ] + struct_array1 = build_struct_array(fields, structs) + + data_type = Arrow::StructDataType.new(fields) + nulls = Arrow::Buffer.new([0b11].pack("C*")) + children = [ + Arrow::Int8Array.new(2, Arrow::Buffer.new([-29, 2].pack("C*")), nulls, 0), + Arrow::BooleanArray.new(2, Arrow::Buffer.new([0b01].pack("C*")), nulls, 0), + ] + assert_equal(struct_array1, + Arrow::StructArray.new(data_type, + 3, + children, + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_flatten + fields = [ + Arrow::Field.new("score", Arrow::Int8DataType.new), + Arrow::Field.new("enabled", Arrow::BooleanDataType.new), + ] + data_type = Arrow::StructDataType.new(fields) + builder = Arrow::StructArrayBuilder.new(data_type) + + builder.append_value + builder.get_field_builder(0).append_value(-29) + builder.get_field_builder(1).append_value(true) + + builder.append_value + builder.field_builders[0].append_value(2) + builder.field_builders[1].append_value(false) + + array = builder.finish + values = array.length.times.collect do |i| + if i.zero? + [ + array.get_field(0).get_value(i), + array.get_field(1).get_value(i), + ] + else + array.flatten.collect do |field| + field.get_value(i) + end + end + end + assert_equal([ + [-29, true], + [2, false], + ], + values) + end +end diff --git a/src/arrow/c_glib/test/test-struct-data-type.rb b/src/arrow/c_glib/test/test-struct-data-type.rb new file mode 100644 index 000000000..15b43768c --- /dev/null +++ b/src/arrow/c_glib/test/test-struct-data-type.rb @@ -0,0 +1,115 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestStructDataType < Test::Unit::TestCase + def setup + @enabled_field_data_type = Arrow::BooleanDataType.new + @message_field_data_type = Arrow::StringDataType.new + @field_data_types = [ + @enabled_field_data_type, + @message_field_data_type, + ] + @enabled_field = Arrow::Field.new("enabled", @enabled_field_data_type) + @message_field = Arrow::Field.new("message", @message_field_data_type) + @fields = [@enabled_field, @message_field] + @data_type = Arrow::StructDataType.new(@fields) + end + + def test_type + assert_equal(Arrow::Type::STRUCT, @data_type.id) + end + + def test_name + assert_equal("struct", @data_type.name) + end + + def test_to_s + assert_equal("struct<enabled: bool, message: string>", + @data_type.to_s) + end + + def test_n_fields + assert_equal(2, @data_type.n_fields) + end + + def test_fields + assert_equal(@fields.zip(@field_data_types), + @data_type.fields.collect {|field| [field, field.data_type]}) + end + + sub_test_case("#get_field") do + def test_found + assert_equal(@fields[1], @data_type.get_field(1)) + end + + def test_negative + assert_equal(@fields[-1], @data_type.get_field(-1)) + end + + def test_over + assert_equal(nil, @data_type.get_field(2)) + end + + def test_data_type + field = @data_type.get_field(0) + assert_equal([ + @fields[0], + @field_data_types[0], + ], + [ + field, + field.data_type, + ]) + end + end + + sub_test_case("#get_field_by_name") do + def test_found + assert_equal(@enabled_field, + @data_type.get_field_by_name("enabled")) + end + + def test_not_found + assert_equal(nil, + @data_type.get_field_by_name("nonexistent")) + end + + def test_data_type + field = @data_type.get_field_by_name("enabled") + assert_equal([ + @enabled_field, + @enabled_field_data_type, + ], + [ + field, + field.data_type, + ]) + end + end + + sub_test_case("#get_field_index") do + def test_found + assert_equal(@fields.index(@enabled_field), + @data_type.get_field_index("enabled")) + end + + def test_not_found + assert_equal(-1, + @data_type.get_field_index("nonexistent")) + end + end +end diff --git a/src/arrow/c_glib/test/test-struct-scalar.rb b/src/arrow/c_glib/test/test-struct-scalar.rb new file mode 100644 index 000000000..9774943ba --- /dev/null +++ b/src/arrow/c_glib/test/test-struct-scalar.rb @@ -0,0 +1,55 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestStructScalar < Test::Unit::TestCase + def setup + fields = [ + Arrow::Field.new("score", Arrow::Int8DataType.new), + Arrow::Field.new("enabled", Arrow::BooleanDataType.new), + ] + @data_type = Arrow::StructDataType.new(fields) + @value = [ + Arrow::Int8Scalar.new(-29), + Arrow::BooleanScalar.new(true), + ] + @scalar = Arrow::StructScalar.new(@data_type, @value) + end + + def test_data_type + assert_equal(@data_type, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::StructScalar.new(@data_type, @value), + @scalar) + end + + def test_to_s + assert_equal("{score:int8 = -29, enabled:bool = true}", @scalar.to_s) + end + + def test_value + assert_equal(@value, @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-table-batch-reader.rb b/src/arrow/c_glib/test/test-table-batch-reader.rb new file mode 100644 index 000000000..b161c8a55 --- /dev/null +++ b/src/arrow/c_glib/test/test-table-batch-reader.rb @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestTableBatchReader < Test::Unit::TestCase + include Helper::Buildable + + def test_empty + table = build_table("visible" => build_boolean_array([])) + reader = Arrow::TableBatchReader.new(table) + assert_nil(reader.read_next) + end + + def test_have_record + array = build_boolean_array([true]) + table = build_table("visible" => array) + reader = Arrow::TableBatchReader.new(table) + assert_equal(build_record_batch("visible" => array), + reader.read_next) + assert_nil(reader.read_next) + end + + def test_schema + array = build_boolean_array([]) + table = build_table("visible" => array) + reader = Arrow::TableBatchReader.new(table) + assert_equal(table.schema, reader.schema) + end +end diff --git a/src/arrow/c_glib/test/test-table-concatenate-options.rb b/src/arrow/c_glib/test/test-table-concatenate-options.rb new file mode 100644 index 000000000..055d6e270 --- /dev/null +++ b/src/arrow/c_glib/test/test-table-concatenate-options.rb @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestTableConcatenateOptions < Test::Unit::TestCase + def setup + @options = Arrow::TableConcatenateOptions.new + end + + def test_unify_schemas + assert do + not @options.unify_schemas? + end + @options.unify_schemas = true + assert do + @options.unify_schemas? + end + end + + def test_promote_nullability + assert do + @options.promote_nullability? + end + @options.promote_nullability = false + assert do + not @options.promote_nullability? + end + end +end diff --git a/src/arrow/c_glib/test/test-table-datum.rb b/src/arrow/c_glib/test/test-table-datum.rb new file mode 100644 index 000000000..7ff3997e8 --- /dev/null +++ b/src/arrow/c_glib/test/test-table-datum.rb @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestTableDatum < Test::Unit::TestCase + include Helper::Buildable + + def setup + @array = build_boolean_array([true, false]) + @table = build_table("visible" => @array) + @datum = Arrow::TableDatum.new(@table) + end + + def test_array? + assert do + not @datum.array? + end + end + + def test_array_like? + assert do + not @datum.array_like? + end + end + + sub_test_case("==") do + def test_true + assert_equal(Arrow::TableDatum.new(@table), + Arrow::TableDatum.new(@table)) + end + + def test_false + assert_not_equal(@datum, + Arrow::ArrayDatum.new(@array)) + end + end + + def test_to_string + assert_equal("Table", @datum.to_s) + end + + def test_value + assert_equal(@table, @datum.value) + end +end diff --git a/src/arrow/c_glib/test/test-table.rb b/src/arrow/c_glib/test/test-table.rb new file mode 100644 index 000000000..615a90c2f --- /dev/null +++ b/src/arrow/c_glib/test/test-table.rb @@ -0,0 +1,290 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestTable < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + sub_test_case(".new") do + def setup + @fields = [ + Arrow::Field.new("visible", Arrow::BooleanDataType.new), + Arrow::Field.new("valid", Arrow::BooleanDataType.new), + ] + @schema = Arrow::Schema.new(@fields) + end + + def dump_table(table) + table.n_columns.times.collect do |i| + field = table.schema.get_field(i) + chunked_array = table.get_column_data(i) + values = [] + chunked_array.chunks.each do |chunk| + chunk.length.times do |j| + values << chunk.get_value(j) + end + end + [ + field.name, + values, + ] + end + end + + def test_arrays + require_gi_bindings(3, 3, 1) + arrays = [ + build_boolean_array([true]), + build_boolean_array([false]), + ] + table = Arrow::Table.new(@schema, arrays) + assert_equal([ + ["visible", [true]], + ["valid", [false]], + ], + dump_table(table)) + end + + def test_chunked_arrays + require_gi_bindings(3, 3, 1) + arrays = [ + Arrow::ChunkedArray.new([build_boolean_array([true]), + build_boolean_array([false])]), + Arrow::ChunkedArray.new([build_boolean_array([false]), + build_boolean_array([true])]), + ] + table = Arrow::Table.new(@schema, arrays) + assert_equal([ + ["visible", [true, false]], + ["valid", [false, true]], + ], + dump_table(table)) + end + + def test_record_batches + require_gi_bindings(3, 3, 1) + record_batches = [ + build_record_batch({ + "visible" => build_boolean_array([true]), + "valid" => build_boolean_array([false]) + }), + build_record_batch({ + "visible" => build_boolean_array([false]), + "valid" => build_boolean_array([true]) + }), + ] + table = Arrow::Table.new(@schema, record_batches) + + assert_equal([ + ["visible", [true, false]], + ["valid", [false, true]], + ], + dump_table(table)) + end + end + + sub_test_case("instance methods") do + def setup + @fields = [ + Arrow::Field.new("visible", Arrow::BooleanDataType.new), + Arrow::Field.new("valid", Arrow::BooleanDataType.new), + ] + @schema = Arrow::Schema.new(@fields) + @columns = [ + build_boolean_array([true]), + build_boolean_array([false]), + ] + @table = Arrow::Table.new(@schema, @columns) + end + + def test_equal + other_table = Arrow::Table.new(@schema, @columns) + assert_equal(@table, other_table) + end + + def test_equal_metadata + other_table = Arrow::Table.new(@schema, @columns) + assert do + @table.equal_metadata(other_table, true) + end + end + + def test_schema + assert_equal(["visible", "valid"], + @table.schema.fields.collect(&:name)) + end + + def test_column_data + assert_equal([ + Arrow::ChunkedArray.new([build_boolean_array([true])]), + Arrow::ChunkedArray.new([build_boolean_array([false])]), + ], + [ + @table.get_column_data(0), + @table.get_column_data(-1), + ]) + end + + def test_n_columns + assert_equal(2, @table.n_columns) + end + + def test_n_rows + assert_equal(1, @table.n_rows) + end + + def test_add_column + field = Arrow::Field.new("added", Arrow::BooleanDataType.new) + chunked_array = Arrow::ChunkedArray.new([build_boolean_array([true])]) + new_table = @table.add_column(1, field, chunked_array) + assert_equal(["visible", "added", "valid"], + new_table.schema.fields.collect(&:name)) + end + + def test_remove_column + new_table = @table.remove_column(0) + assert_equal(["valid"], + new_table.schema.fields.collect(&:name)) + end + + def test_replace_column + field = Arrow::Field.new("added", Arrow::BooleanDataType.new) + chunked_array = Arrow::ChunkedArray.new([build_boolean_array([true])]) + new_table = @table.replace_column(0, field, chunked_array) + assert_equal(["added", "valid"], + new_table.schema.fields.collect(&:name)) + end + + def test_to_s + table = build_table("valid" => build_boolean_array([true, false, true])) + assert_equal(<<-TABLE, table.to_s) +valid: bool +---- +valid: + [ + [ + true, + false, + true + ] + ] + TABLE + end + + sub_test_case("#concatenate") do + def test_without_options + table = build_table("visible" => + build_boolean_array([true, false, true, false])) + table1 = build_table("visible" => build_boolean_array([true])) + table2 = build_table("visible" => build_boolean_array([false, true])) + table3 = build_table("visible" => build_boolean_array([false])) + assert_equal(table, table1.concatenate([table2, table3])) + end + + def test_with_options + options = Arrow::TableConcatenateOptions.new + options.unify_schemas = true + table = build_table("a" => build_int32_array([1, nil, 3]), + "b" => build_int32_array([10, nil, 30]), + "c" => build_int32_array([nil, 200, nil])) + table1 = build_table("a" => build_int32_array([1]), + "b" => build_int32_array([10])) + table2 = build_table("c" => build_int32_array([200])) + table3 = build_table("a" => build_int32_array([3]), + "b" => build_int32_array([30])) + assert_equal(table, table1.concatenate([table2, table3], options)) + end + end + + sub_test_case("#slice") do + test("offset: positive") do + visibles = [true, false, true] + table = build_table("visible" => build_boolean_array(visibles)) + assert_equal(build_table("visible" => build_boolean_array([false, true])), + table.slice(1, 2)) + end + + test("offset: negative") do + visibles = [true, false, true] + table = build_table("visible" => build_boolean_array(visibles)) + assert_equal(build_table("visible" => build_boolean_array([false, true])), + table.slice(-2, 2)) + end + end + + def test_combine_chunks + table = build_table( + "visible" => Arrow::ChunkedArray::new([build_boolean_array([true, false, true]), + build_boolean_array([false, true]), + build_boolean_array([false])]) + ) + combined_table = table.combine_chunks + all_values = combined_table.n_columns.times.collect do |i| + column = combined_table.get_column_data(i) + column.n_chunks.times.collect do |j| + column.get_chunk(j).values + end + end + assert_equal([[[true, false, true, false, true, false]]], + all_values) + end + + sub_test_case("#write_as_feather") do + def setup + super + @tempfile = Tempfile.open("arrow-table-write-as-feather") + begin + yield + ensure + @tempfile.close! + end + end + + def read_feather + input = Arrow::MemoryMappedInputStream.new(@tempfile.path) + reader = Arrow::FeatherFileReader.new(input) + begin + yield(reader.read) + ensure + input.close + end + end + + test("default") do + output = Arrow::FileOutputStream.new(@tempfile.path, false) + @table.write_as_feather(output) + output.close + + read_feather do |read_table| + assert_equal(@table, read_table) + end + end + + test("compression") do + output = Arrow::FileOutputStream.new(@tempfile.path, false) + properties = Arrow::FeatherWriteProperties.new + properties.compression = :zstd + @table.write_as_feather(output, properties) + output.close + + read_feather do |read_table| + assert_equal(@table, read_table) + end + end + end + end +end diff --git a/src/arrow/c_glib/test/test-take.rb b/src/arrow/c_glib/test/test-take.rb new file mode 100644 index 000000000..f97c7ad73 --- /dev/null +++ b/src/arrow/c_glib/test/test-take.rb @@ -0,0 +1,214 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestTake < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + sub_test_case("Array") do + def test_no_null + indices = build_int16_array([1, 0, 2]) + assert_equal(build_int16_array([0, 1, 2]), + build_int16_array([1, 0 ,2]).take(indices)) + end + + def test_null + indices = build_int16_array([2, nil, 0]) + assert_equal(build_int16_array([2, nil, 1]), + build_int16_array([1, 0, 2]).take(indices)) + end + + def test_out_of_index + indices = build_int16_array([1, 2, 3]) + assert_raise(Arrow::Error::Index) do + build_int16_array([0, 1, 2]).take(indices) + end + end + + def test_chunked_array + taken_chunks = [ + build_int16_array([0, 1]), + build_int16_array([2]) + ] + taken_chunked_array = Arrow::ChunkedArray.new(taken_chunks) + indices_chunks = [ + build_int16_array([1, 0]), + build_int16_array([2]) + ] + indices = Arrow::ChunkedArray.new(indices_chunks) + assert_equal(taken_chunked_array, + build_int16_array([1, 0, 2]).take_chunked_array(indices)) + end + end + + sub_test_case("Table") do + def setup + fields = [ + Arrow::Field.new("field1", Arrow::Int16DataType.new), + Arrow::Field.new("field2", Arrow::Int16DataType.new) + ] + @schema = Arrow::Schema.new(fields) + arrays = [ + build_int16_array([0, 1, 2]), + build_int16_array([3, 5, 4]) + ] + @table = Arrow::Table.new(@schema, arrays) + end + + def test_no_null + arrays = [ + build_int16_array([1, 0, 2]), + build_int16_array([5, 3, 4]) + ] + taken_table = Arrow::Table.new(@schema, arrays) + indices = build_int16_array([1, 0, 2]) + assert_equal(taken_table, + @table.take(indices)) + end + + def test_null + arrays = [ + build_int16_array([2, nil, 0]), + build_int16_array([4, nil, 3]) + ] + taken_table = Arrow::Table.new(@schema, arrays) + indices = build_int16_array([2, nil, 0]) + assert_equal(taken_table, + @table.take(indices)) + end + + def test_out_of_index + indices = build_int16_array([1, 2, 3]) + assert_raise(Arrow::Error::Index) do + @table.take(indices) + end + end + + def test_chunked_array + arrays = [ + build_int16_array([1, 0, 2]), + build_int16_array([5, 3, 4]) + ] + taken_table = Arrow::Table.new(@schema, arrays) + chunks = [ + build_int16_array([1, 0]), + build_int16_array([2]) + ] + indices = Arrow::ChunkedArray.new(chunks) + assert_equal(taken_table, + @table.take_chunked_array(indices)) + end + end + + sub_test_case("ChunkedArray") do + def setup + chunks = [ + build_int16_array([1, 0]), + build_int16_array([2]), + ] + @chunked_array = Arrow::ChunkedArray.new(chunks) + end + + def test_no_null + chunks = [ + build_int16_array([0, 1]), + build_int16_array([2]) + ] + taken_chunked_array = Arrow::ChunkedArray.new(chunks) + indices = build_int16_array([1, 0, 2]) + assert_equal(taken_chunked_array, + @chunked_array.take(indices)) + end + + def test_null + chunks = [ + build_int16_array([2, nil]), + build_int16_array([1]) + ] + taken_chunked_array = Arrow::ChunkedArray.new(chunks) + indices = build_int16_array([2, nil, 0]) + assert_equal(taken_chunked_array, + @chunked_array.take(indices)) + end + + def test_out_of_index + indices = build_int16_array([1, 2, 3]) + assert_raise(Arrow::Error::Index) do + @chunked_array.take(indices) + end + end + + def test_chunked_array + taken_chunks = [ + build_int16_array([0, 1]), + build_int16_array([2]) + ] + taken_chunked_array = Arrow::ChunkedArray.new(taken_chunks) + indices_chunks = [ + build_int16_array([1, 0]), + build_int16_array([2]) + ] + indices = Arrow::ChunkedArray.new(indices_chunks) + assert_equal(taken_chunked_array, + @chunked_array.take_chunked_array(indices)) + end + end + + sub_test_case("RecordBatch") do + def setup + fields = [ + Arrow::Field.new("field1", Arrow::Int16DataType.new), + Arrow::Field.new("field2", Arrow::Int16DataType.new) + ] + @schema = Arrow::Schema.new(fields) + columns = [ + build_int16_array([1, 0, 2]), + build_int16_array([3, 5, 4]) + ] + @record_batch = Arrow::RecordBatch.new(@schema, 3, columns) + end + + def test_no_null + columns = [ + build_int16_array([0, 1, 2]), + build_int16_array([5, 3, 4]) + ] + taken_record_batch = Arrow::RecordBatch.new(@schema, 3, columns) + indices = build_int16_array([1, 0, 2]) + assert_equal(taken_record_batch, + @record_batch.take(indices)) + end + + def test_null + columns = [ + build_int16_array([2, nil, 1]), + build_int16_array([4, nil, 3]) + ] + taken_record_batch = Arrow::RecordBatch.new(@schema, 3, columns) + indices = build_int16_array([2, nil, 0]) + assert_equal(taken_record_batch, + @record_batch.take(indices)) + end + + def test_out_of_index + indices = build_int16_array([1, 2, 3]) + assert_raise(Arrow::Error::Index) do + @record_batch.take(indices) + end + end + end +end diff --git a/src/arrow/c_glib/test/test-tensor.rb b/src/arrow/c_glib/test/test-tensor.rb new file mode 100644 index 000000000..31f2556c4 --- /dev/null +++ b/src/arrow/c_glib/test/test-tensor.rb @@ -0,0 +1,125 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestTensor < Test::Unit::TestCase + include Helper::Omittable + + def setup + @raw_data = [ + 1, 2, + 3, 4, + + 5, 6, + 7, 8, + + 9, 10, + 11, 12, + ] + data = Arrow::Buffer.new(@raw_data.pack("c*")) + @shape = [3, 2, 2] + strides = [] + names = ["a", "b", "c"] + @tensor = Arrow::Tensor.new(Arrow::Int8DataType.new, + data, + @shape, + strides, + names) + end + + def test_equal + data = Arrow::Buffer.new(@raw_data.pack("c*")) + strides = [] + names = ["a", "b", "c"] + other_tensor = Arrow::Tensor.new(Arrow::Int8DataType.new, + data, + @shape, + strides, + names) + assert_equal(@tensor, + other_tensor) + end + + def test_value_data_type + assert_equal(Arrow::Int8DataType, @tensor.value_data_type.class) + end + + def test_value_type + assert_equal(Arrow::Type::INT8, @tensor.value_type) + end + + def test_buffer + assert_equal(@raw_data, @tensor.buffer.data.to_s.unpack("c*")) + end + + def test_shape + require_gi_bindings(3, 3, 1) + assert_equal(@shape, @tensor.shape) + end + + def test_strides + require_gi_bindings(3, 3, 1) + assert_equal([4, 2, 1], @tensor.strides) + end + + def test_n_dimensions + assert_equal(@shape.size, @tensor.n_dimensions) + end + + def test_dimension_name + dimension_names = @tensor.n_dimensions.times.collect do |i| + @tensor.get_dimension_name(i) + end + assert_equal(["a", "b", "c"], + dimension_names) + end + + def test_size + assert_equal(@raw_data.size, @tensor.size) + end + + def test_mutable? + assert do + not @tensor.mutable? + end + end + + def test_contiguous? + assert do + @tensor.contiguous? + end + end + + def test_row_major? + assert do + @tensor.row_major? + end + end + + def test_column_major? + assert do + not @tensor.column_major? + end + end + + def test_io + buffer = Arrow::ResizableBuffer.new(0) + output = Arrow::BufferOutputStream.new(buffer) + output.write_tensor(@tensor) + input = Arrow::BufferInputStream.new(buffer) + assert_equal(@tensor, input.read_tensor) + end +end diff --git a/src/arrow/c_glib/test/test-time-data-type.rb b/src/arrow/c_glib/test/test-time-data-type.rb new file mode 100644 index 000000000..a7b1503fd --- /dev/null +++ b/src/arrow/c_glib/test/test-time-data-type.rb @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestTimeDataType < Test::Unit::TestCase + def test_type + assert do + Arrow::TimeDataType.gtype.abstract? + end + end +end diff --git a/src/arrow/c_glib/test/test-time32-array.rb b/src/arrow/c_glib/test/test-time32-array.rb new file mode 100644 index 000000000..b648c69f8 --- /dev/null +++ b/src/arrow/c_glib/test/test-time32-array.rb @@ -0,0 +1,69 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestTime32Array < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + def test_new + midnight = 0 + after_midnight = 60 * 10 # 00:10:00 + raw_data = [midnight, after_midnight] + data_type = Arrow::Time32DataType.new(:second) + assert_equal(build_time32_array(:second, [*raw_data, nil]), + Arrow::Time32Array.new(data_type, + 3, + Arrow::Buffer.new(raw_data.pack("l*")), + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_buffer + midnight = 0 + after_midnight = 60 * 10 # 00:10:00 + raw_data = [midnight, after_midnight] + array = build_time32_array(:second, raw_data) + assert_equal(raw_data.pack("l*"), + array.buffer.data.to_s) + end + + def test_value + after_midnight = 60 * 10 # 00:10:00 + array = build_time32_array(:second, [after_midnight]) + assert_equal(after_midnight, array.get_value(0)) + end + + def test_values + midnight = 0 + after_midnight = 60 * 10 # 00:10:00 + raw_data = [midnight, after_midnight] + array = build_time32_array(:second, raw_data) + assert_equal(raw_data, array.values) + end + + sub_test_case("unit") do + def test_second + array = build_time32_array(:second, []) + assert_equal(Arrow::TimeUnit::SECOND, array.value_data_type.unit) + end + + def test_milli + array = build_time32_array(:milli, []) + assert_equal(Arrow::TimeUnit::MILLI, array.value_data_type.unit) + end + end +end diff --git a/src/arrow/c_glib/test/test-time32-data-type.rb b/src/arrow/c_glib/test/test-time32-data-type.rb new file mode 100644 index 000000000..6ecf327fe --- /dev/null +++ b/src/arrow/c_glib/test/test-time32-data-type.rb @@ -0,0 +1,56 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestTime32DataType < Test::Unit::TestCase + def test_type + data_type = Arrow::Time32DataType.new(:second) + assert_equal(Arrow::Type::TIME32, data_type.id) + end + + def test_name + data_type = Arrow::Time32DataType.new(:second) + assert_equal("time32", data_type.name) + end + + def test_invalid_unit + message = + "[time32-data-type][new] time unit must be second or milli: <micro>" + assert_raise(Arrow::Error::Invalid.new(message)) do + Arrow::Time32DataType.new(:micro) + end + end + + sub_test_case("second") do + def setup + @data_type = Arrow::Time32DataType.new(:second) + end + + def test_to_s + assert_equal("time32[s]", @data_type.to_s) + end + end + + sub_test_case("milli") do + def setup + @data_type = Arrow::Time32DataType.new(:milli) + end + + def test_to_s + assert_equal("time32[ms]", @data_type.to_s) + end + end +end diff --git a/src/arrow/c_glib/test/test-time32-scalar.rb b/src/arrow/c_glib/test/test-time32-scalar.rb new file mode 100644 index 000000000..94c0a7592 --- /dev/null +++ b/src/arrow/c_glib/test/test-time32-scalar.rb @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestTime32Scalar < Test::Unit::TestCase + def setup + @data_type = Arrow::Time32DataType.new(:second) + @value = 60 * 10 # 00:10:00 + @scalar = Arrow::Time32Scalar.new(@data_type, @value) + end + + def test_data_type + assert_equal(@data_type, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::Time32Scalar.new(@data_type, @value), + @scalar) + end + + def test_to_s + assert_equal("00:10:00", @scalar.to_s) + end + + def test_value + assert_equal(@value, @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-time64-array.rb b/src/arrow/c_glib/test/test-time64-array.rb new file mode 100644 index 000000000..775d3153a --- /dev/null +++ b/src/arrow/c_glib/test/test-time64-array.rb @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestTime64Array < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + def test_new + midnight = 0 + after_midnight = 60 * 10 * 1000 * 1000 # 00:10:00.000000 + raw_data = [midnight, after_midnight] + data_type = Arrow::Time64DataType.new(:micro) + assert_equal(build_time64_array(:micro, [*raw_data, nil]), + Arrow::Time64Array.new(data_type, + 3, + Arrow::Buffer.new(raw_data.pack("q*")), + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_buffer + midnight = 0 + after_midnight = 60 * 10 * 1000 * 1000 # 00:10:00.000000 + raw_data = [midnight, after_midnight] + array = build_time64_array(:micro, raw_data) + assert_equal(raw_data.pack("q*"), + array.buffer.data.to_s) + end + + def test_value + after_midnight = 60 * 10 * 1000 * 1000 # 00:10:00.000000 + array = build_time64_array(:micro, [after_midnight]) + assert_equal(after_midnight, array.get_value(0)) + end + + def test_values + midnight = 0 + after_midnight = 60 * 10 * 1000 * 1000 # 00:10:00.000000 + raw_data = [midnight, after_midnight] + array = build_time64_array(:micro, raw_data) + assert_equal(raw_data, array.values) + end +end diff --git a/src/arrow/c_glib/test/test-time64-data-type.rb b/src/arrow/c_glib/test/test-time64-data-type.rb new file mode 100644 index 000000000..812e9220b --- /dev/null +++ b/src/arrow/c_glib/test/test-time64-data-type.rb @@ -0,0 +1,56 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestTime64DataType < Test::Unit::TestCase + def test_type + data_type = Arrow::Time64DataType.new(:micro) + assert_equal(Arrow::Type::TIME64, data_type.id) + end + + def test_name + data_type = Arrow::Time64DataType.new(:micro) + assert_equal("time64", data_type.name) + end + + def test_invalid_unit + message = + "[time64-data-type][new] time unit must be micro or nano: <second>" + assert_raise(Arrow::Error::Invalid.new(message)) do + Arrow::Time64DataType.new(:second) + end + end + + sub_test_case("micro") do + def setup + @data_type = Arrow::Time64DataType.new(:micro) + end + + def test_to_s + assert_equal("time64[us]", @data_type.to_s) + end + end + + sub_test_case("nano") do + def setup + @data_type = Arrow::Time64DataType.new(:nano) + end + + def test_to_s + assert_equal("time64[ns]", @data_type.to_s) + end + end +end diff --git a/src/arrow/c_glib/test/test-time64-scalar.rb b/src/arrow/c_glib/test/test-time64-scalar.rb new file mode 100644 index 000000000..fb2843ca6 --- /dev/null +++ b/src/arrow/c_glib/test/test-time64-scalar.rb @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestTime64Scalar < Test::Unit::TestCase + def setup + @data_type = Arrow::Time64DataType.new(:micro) + @value = 60 * 10 * 1000 * 1000 # 00:10:00.000000 + @scalar = Arrow::Time64Scalar.new(@data_type, @value) + end + + def test_data_type + assert_equal(@data_type, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::Time64Scalar.new(@data_type, @value), + @scalar) + end + + def test_to_s + assert_equal("00:10:00.000000", @scalar.to_s) + end + + def test_value + assert_equal(@value, @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-timestamp-array.rb b/src/arrow/c_glib/test/test-timestamp-array.rb new file mode 100644 index 000000000..793402323 --- /dev/null +++ b/src/arrow/c_glib/test/test-timestamp-array.rb @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestTimestampArray < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + def test_new + epoch = 0 + after_epoch = 1504953190854 # 2017-09-09T10:33:10.854Z + raw_data = [epoch, after_epoch] + data_type = Arrow::TimestampDataType.new(:milli) + assert_equal(build_timestamp_array(:milli, [*raw_data, nil]), + Arrow::TimestampArray.new(data_type, + 3, + Arrow::Buffer.new(raw_data.pack("q*")), + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_buffer + epoch = 0 + after_epoch = 1504953190854 # 2017-09-09T10:33:10.854Z + raw_data = [epoch, after_epoch] + array = build_timestamp_array(:milli, raw_data) + assert_equal(raw_data.pack("q*"), + array.buffer.data.to_s) + end + + def test_value + after_epoch = 1504953190854 # 2017-09-09T10:33:10.854Z + array = build_timestamp_array(:milli, [after_epoch]) + assert_equal(after_epoch, array.get_value(0)) + end + + def test_values + epoch = 0 + after_epoch = 1504953190854 # 2017-09-09T10:33:10.854Z + raw_data = [epoch, after_epoch] + array = build_timestamp_array(:milli, raw_data) + assert_equal(raw_data, array.values) + end +end diff --git a/src/arrow/c_glib/test/test-timestamp-data-type.rb b/src/arrow/c_glib/test/test-timestamp-data-type.rb new file mode 100644 index 000000000..dac3a9bc6 --- /dev/null +++ b/src/arrow/c_glib/test/test-timestamp-data-type.rb @@ -0,0 +1,84 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestTimestampDataType < Test::Unit::TestCase + def test_type + data_type = Arrow::TimestampDataType.new(:micro) + assert_equal(Arrow::Type::TIMESTAMP, data_type.id) + end + + def test_name + data_type = Arrow::TimestampDataType.new(:micro) + assert_equal("timestamp", data_type.name) + end + + sub_test_case("second") do + def setup + @data_type = Arrow::TimestampDataType.new(:second) + end + + def test_to_s + assert_equal("timestamp[s]", @data_type.to_s) + end + + def test_unit + assert_equal(Arrow::TimeUnit::SECOND, @data_type.unit) + end + end + + sub_test_case("millisecond") do + def setup + @data_type = Arrow::TimestampDataType.new(:milli) + end + + def test_to_s + assert_equal("timestamp[ms]", @data_type.to_s) + end + + def test_unit + assert_equal(Arrow::TimeUnit::MILLI, @data_type.unit) + end + end + + sub_test_case("micro") do + def setup + @data_type = Arrow::TimestampDataType.new(:micro) + end + + def test_to_s + assert_equal("timestamp[us]", @data_type.to_s) + end + + def test_unit + assert_equal(Arrow::TimeUnit::MICRO, @data_type.unit) + end + end + + sub_test_case("nano") do + def setup + @data_type = Arrow::TimestampDataType.new(:nano) + end + + def test_to_s + assert_equal("timestamp[ns]", @data_type.to_s) + end + + def test_unit + assert_equal(Arrow::TimeUnit::NANO, @data_type.unit) + end + end +end diff --git a/src/arrow/c_glib/test/test-timestamp-scalar.rb b/src/arrow/c_glib/test/test-timestamp-scalar.rb new file mode 100644 index 000000000..9aa676b5d --- /dev/null +++ b/src/arrow/c_glib/test/test-timestamp-scalar.rb @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestTimestampScalar < Test::Unit::TestCase + def setup + @data_type = Arrow::TimestampDataType.new(:milli) + @value = 1504953190854 # 2017-09-09T10:33:10.854Z + @scalar = Arrow::TimestampScalar.new(@data_type, @value) + end + + def test_data_type + assert_equal(@data_type, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::TimestampScalar.new(@data_type, @value), + @scalar) + end + + def test_to_s + assert_equal("2017-09-09 10:33:10.854", @scalar.to_s) + end + + def test_value + assert_equal(@value, @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-uint-array-builder.rb b/src/arrow/c_glib/test/test-uint-array-builder.rb new file mode 100644 index 000000000..89621189b --- /dev/null +++ b/src/arrow/c_glib/test/test-uint-array-builder.rb @@ -0,0 +1,59 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestUIntArrayBuilder < Test::Unit::TestCase + include Helper::Buildable + + def test_uint8 + values = [0, 2] + assert_equal(build_uint_array([*values, nil]), + Arrow::UInt8Array.new(3, + Arrow::Buffer.new(values.pack("C*")), + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_uint16 + border_value = 2 ** 8 + values = [0, border_value] + assert_equal(build_uint_array([*values, nil]), + Arrow::UInt16Array.new(3, + Arrow::Buffer.new(values.pack("S*")), + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_uint32 + border_value = 2 ** 16 + values = [0, border_value] + assert_equal(build_uint_array([*values, nil]), + Arrow::UInt32Array.new(3, + Arrow::Buffer.new(values.pack("L*")), + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_uint64 + border_value = 2 ** 32 + values = [0, border_value] + assert_equal(build_uint_array([*values, nil]), + Arrow::UInt64Array.new(3, + Arrow::Buffer.new(values.pack("Q*")), + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end +end diff --git a/src/arrow/c_glib/test/test-uint16-array.rb b/src/arrow/c_glib/test/test-uint16-array.rb new file mode 100644 index 000000000..a02f8338a --- /dev/null +++ b/src/arrow/c_glib/test/test-uint16-array.rb @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestUInt16Array < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + def test_new + assert_equal(build_uint16_array([1, 2, nil]), + Arrow::UInt16Array.new(3, + Arrow::Buffer.new([1, 2].pack("S*")), + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_buffer + builder = Arrow::UInt16ArrayBuilder.new + builder.append_value(1) + builder.append_value(2) + builder.append_value(4) + array = builder.finish + assert_equal([1, 2, 4].pack("S*"), array.buffer.data.to_s) + end + + def test_value + builder = Arrow::UInt16ArrayBuilder.new + builder.append_value(1) + array = builder.finish + assert_equal(1, array.get_value(0)) + end + + def test_values + require_gi_bindings(3, 1, 7) + builder = Arrow::UInt16ArrayBuilder.new + builder.append_value(1) + builder.append_value(2) + builder.append_value(4) + array = builder.finish + assert_equal([1, 2, 4], array.values) + end + + def test_sum + array = build_uint8_array([2, 4, nil]) + assert_equal(6, array.sum) + end +end diff --git a/src/arrow/c_glib/test/test-uint16-data-type.rb b/src/arrow/c_glib/test/test-uint16-data-type.rb new file mode 100644 index 000000000..e91489e08 --- /dev/null +++ b/src/arrow/c_glib/test/test-uint16-data-type.rb @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestUInt16DataType < Test::Unit::TestCase + def test_type + data_type = Arrow::UInt16DataType.new + assert_equal(Arrow::Type::UINT16, data_type.id) + end + + def test_name + data_type = Arrow::UInt16DataType.new + assert_equal("uint16", data_type.name) + end + + def test_to_s + data_type = Arrow::UInt16DataType.new + assert_equal("uint16", data_type.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-uint16-scalar.rb b/src/arrow/c_glib/test/test-uint16-scalar.rb new file mode 100644 index 000000000..000d620b3 --- /dev/null +++ b/src/arrow/c_glib/test/test-uint16-scalar.rb @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestUInt16Scalar < Test::Unit::TestCase + def setup + @scalar = Arrow::UInt16Scalar.new((2 ** 16) - 1) + end + + def test_data_type + assert_equal(Arrow::UInt16DataType.new, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::UInt16Scalar.new((2 ** 16) - 1), + @scalar) + end + + def test_to_s + assert_equal(((2 ** 16) - 1).to_s, @scalar.to_s) + end + + def test_value + assert_equal((2 ** 16) - 1, @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-uint32-array.rb b/src/arrow/c_glib/test/test-uint32-array.rb new file mode 100644 index 000000000..04eb60b37 --- /dev/null +++ b/src/arrow/c_glib/test/test-uint32-array.rb @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestUInt32Array < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + def test_new + assert_equal(build_uint32_array([1, 2, nil]), + Arrow::UInt32Array.new(3, + Arrow::Buffer.new([1, 2].pack("L*")), + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_buffer + builder = Arrow::UInt32ArrayBuilder.new + builder.append_value(1) + builder.append_value(2) + builder.append_value(4) + array = builder.finish + assert_equal([1, 2, 4].pack("L*"), array.buffer.data.to_s) + end + + def test_value + builder = Arrow::UInt32ArrayBuilder.new + builder.append_value(1) + array = builder.finish + assert_equal(1, array.get_value(0)) + end + + def test_values + require_gi_bindings(3, 1, 7) + builder = Arrow::UInt32ArrayBuilder.new + builder.append_value(1) + builder.append_value(2) + builder.append_value(4) + array = builder.finish + assert_equal([1, 2, 4], array.values) + end + + def test_sum + array = build_uint32_array([2, 4, nil]) + assert_equal(6, array.sum) + end +end diff --git a/src/arrow/c_glib/test/test-uint32-data-type.rb b/src/arrow/c_glib/test/test-uint32-data-type.rb new file mode 100644 index 000000000..dbe8c34a0 --- /dev/null +++ b/src/arrow/c_glib/test/test-uint32-data-type.rb @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestUInt32DataType < Test::Unit::TestCase + def test_type + data_type = Arrow::UInt32DataType.new + assert_equal(Arrow::Type::UINT32, data_type.id) + end + + def test_name + data_type = Arrow::UInt32DataType.new + assert_equal("uint32", data_type.name) + end + + def test_to_s + data_type = Arrow::UInt32DataType.new + assert_equal("uint32", data_type.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-uint32-scalar.rb b/src/arrow/c_glib/test/test-uint32-scalar.rb new file mode 100644 index 000000000..c41f99330 --- /dev/null +++ b/src/arrow/c_glib/test/test-uint32-scalar.rb @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestUInt32Scalar < Test::Unit::TestCase + def setup + @scalar = Arrow::UInt32Scalar.new((2 ** 32) - 1) + end + + def test_data_type + assert_equal(Arrow::UInt32DataType.new, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::UInt32Scalar.new((2 ** 32) - 1), + @scalar) + end + + def test_to_s + assert_equal(((2 ** 32) - 1).to_s, @scalar.to_s) + end + + def test_value + assert_equal((2 ** 32) - 1, @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-uint64-array.rb b/src/arrow/c_glib/test/test-uint64-array.rb new file mode 100644 index 000000000..f34e6813e --- /dev/null +++ b/src/arrow/c_glib/test/test-uint64-array.rb @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestUInt64Array < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + def test_new + assert_equal(build_uint64_array([1, 2, nil]), + Arrow::UInt64Array.new(3, + Arrow::Buffer.new([1, 2].pack("Q*")), + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_buffer + builder = Arrow::UInt64ArrayBuilder.new + builder.append_value(1) + builder.append_value(2) + builder.append_value(4) + array = builder.finish + assert_equal([1, 2, 4].pack("Q*"), array.buffer.data.to_s) + end + + def test_value + builder = Arrow::UInt64ArrayBuilder.new + builder.append_value(1) + array = builder.finish + assert_equal(1, array.get_value(0)) + end + + def test_values + require_gi_bindings(3, 1, 7) + builder = Arrow::UInt64ArrayBuilder.new + builder.append_value(1) + builder.append_value(2) + builder.append_value(4) + array = builder.finish + assert_equal([1, 2, 4], array.values) + end + + def test_sum + array = build_uint64_array([2, 4, nil]) + assert_equal(6, array.sum) + end +end diff --git a/src/arrow/c_glib/test/test-uint64-data-type.rb b/src/arrow/c_glib/test/test-uint64-data-type.rb new file mode 100644 index 000000000..bd53b48bc --- /dev/null +++ b/src/arrow/c_glib/test/test-uint64-data-type.rb @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestUInt64DataType < Test::Unit::TestCase + def test_type + data_type = Arrow::UInt64DataType.new + assert_equal(Arrow::Type::UINT64, data_type.id) + end + + def test_name + data_type = Arrow::UInt64DataType.new + assert_equal("uint64", data_type.name) + end + + def test_to_s + data_type = Arrow::UInt64DataType.new + assert_equal("uint64", data_type.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-uint64-scalar.rb b/src/arrow/c_glib/test/test-uint64-scalar.rb new file mode 100644 index 000000000..19c12461c --- /dev/null +++ b/src/arrow/c_glib/test/test-uint64-scalar.rb @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestUInt64Scalar < Test::Unit::TestCase + def setup + @scalar = Arrow::UInt64Scalar.new((2 ** 64) - 1) + end + + def test_data_type + assert_equal(Arrow::UInt64DataType.new, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::UInt64Scalar.new((2 ** 64) - 1), + @scalar) + end + + def test_to_s + assert_equal(((2 ** 64) - 1).to_s, @scalar.to_s) + end + + def test_value + assert_equal((2 ** 64) - 1, @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-uint8-array.rb b/src/arrow/c_glib/test/test-uint8-array.rb new file mode 100644 index 000000000..e22b1b12c --- /dev/null +++ b/src/arrow/c_glib/test/test-uint8-array.rb @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestUInt8Array < Test::Unit::TestCase + include Helper::Buildable + + def test_new + assert_equal(build_uint8_array([1, 2, nil]), + Arrow::UInt8Array.new(3, + Arrow::Buffer.new([1, 2].pack("C*")), + Arrow::Buffer.new([0b011].pack("C*")), + -1)) + end + + def test_buffer + builder = Arrow::UInt8ArrayBuilder.new + builder.append_value(1) + builder.append_value(2) + builder.append_value(4) + array = builder.finish + assert_equal([1, 2, 4].pack("C*"), array.buffer.data.to_s) + end + + def test_value + builder = Arrow::UInt8ArrayBuilder.new + builder.append_value(1) + array = builder.finish + assert_equal(1, array.get_value(0)) + end + + def test_values + builder = Arrow::UInt8ArrayBuilder.new + builder.append_value(1) + builder.append_value(2) + builder.append_value(4) + array = builder.finish + assert_equal([1, 2, 4], array.values) + end + + def test_sum + array = build_uint8_array([2, 4, nil]) + assert_equal(6, array.sum) + end +end diff --git a/src/arrow/c_glib/test/test-uint8-data-type.rb b/src/arrow/c_glib/test/test-uint8-data-type.rb new file mode 100644 index 000000000..cf8f89dbd --- /dev/null +++ b/src/arrow/c_glib/test/test-uint8-data-type.rb @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestUInt8DataType < Test::Unit::TestCase + def test_type + data_type = Arrow::UInt8DataType.new + assert_equal(Arrow::Type::UINT8, data_type.id) + end + + def test_signed? + data_type = Arrow::UInt8DataType.new + assert do + not data_type.signed? + end + end + + def test_name + data_type = Arrow::UInt8DataType.new + assert_equal("uint8", data_type.name) + end + + def test_to_s + data_type = Arrow::UInt8DataType.new + assert_equal("uint8", data_type.to_s) + end +end diff --git a/src/arrow/c_glib/test/test-uint8-scalar.rb b/src/arrow/c_glib/test/test-uint8-scalar.rb new file mode 100644 index 000000000..54bc1c954 --- /dev/null +++ b/src/arrow/c_glib/test/test-uint8-scalar.rb @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestUInt8Scalar < Test::Unit::TestCase + def setup + @scalar = Arrow::UInt8Scalar.new((2 ** 8) - 1) + end + + def test_data_type + assert_equal(Arrow::UInt8DataType.new, + @scalar.data_type) + end + + def test_valid? + assert do + @scalar.valid? + end + end + + def test_equal + assert_equal(Arrow::UInt8Scalar.new((2 ** 8) - 1), + @scalar) + end + + def test_to_s + assert_equal(((2 ** 8) - 1).to_s, @scalar.to_s) + end + + def test_value + assert_equal((2 ** 8) - 1, @scalar.value) + end +end diff --git a/src/arrow/c_glib/test/test-unique.rb b/src/arrow/c_glib/test/test-unique.rb new file mode 100644 index 000000000..b94ff462b --- /dev/null +++ b/src/arrow/c_glib/test/test-unique.rb @@ -0,0 +1,31 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestUnique < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + def test_int32 + assert_equal(build_int32_array([1, 3, -1, -3]), + build_int32_array([1, 3, 1, -1, -3, -1]).unique) + end + + def test_string + assert_equal(build_string_array(["Ruby", "Python"]), + build_string_array(["Ruby", "Python", "Ruby"]).unique) + end +end diff --git a/src/arrow/c_glib/test/test-variance-options.rb b/src/arrow/c_glib/test/test-variance-options.rb new file mode 100644 index 000000000..64bdf670b --- /dev/null +++ b/src/arrow/c_glib/test/test-variance-options.rb @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestVarianceOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::VarianceOptions.new + end + + def test_ddof + assert_equal(0, @options.ddof) + @options.ddof = 1 + assert_equal(1, @options.ddof) + end + + def test_skip_nulls + assert do + @options.skip_nulls? + end + @options.skip_nulls = false + assert do + not @options.skip_nulls? + end + end + + def test_min_count + assert_equal(0, @options.min_count) + @options.min_count = 1 + assert_equal(1, @options.min_count) + end +end diff --git a/src/arrow/c_glib/test/test-write-options.rb b/src/arrow/c_glib/test/test-write-options.rb new file mode 100644 index 000000000..c528ce673 --- /dev/null +++ b/src/arrow/c_glib/test/test-write-options.rb @@ -0,0 +1,102 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestWriteOptions < Test::Unit::TestCase + def setup + @options = Arrow::WriteOptions.new + end + + sub_test_case("allow-64bit") do + def test_default + assert do + not @options.allow_64bit? + end + end + + def test_accessor + @options.allow_64bit = true + assert do + @options.allow_64bit? + end + end + end + + sub_test_case("max-recursion-depth") do + def test_default + assert_equal(64, @options.max_recursion_depth) + end + + def test_accessor + @options.max_recursion_depth = 29 + assert_equal(29, @options.max_recursion_depth) + end + end + + + sub_test_case("alignment") do + def test_default + assert_equal(8, @options.alignment) + end + + def test_accessor + @options.alignment = 64 + assert_equal(64, @options.alignment) + end + end + + sub_test_case("write-legacy-ipc-format") do + def test_default + assert do + not @options.write_legacy_ipc_format? + end + end + + def test_accessor + @options.write_legacy_ipc_format = true + assert do + @options.write_legacy_ipc_format? + end + end + end + + sub_test_case("codec") do + def test_default + assert_nil(@options.codec) + end + + def test_accessor + @options.codec = Arrow::Codec.new(:zstd) + assert_equal("zstd", + @options.codec.name) + end + end + + sub_test_case("use-threads") do + def test_default + assert do + @options.use_threads? + end + end + + def test_accessor + @options.use_threads = false + assert do + not @options.use_threads? + end + end + end +end |