diff options
Diffstat (limited to 'src/arrow/ruby/red-arrow')
225 files changed, 24186 insertions, 0 deletions
diff --git a/src/arrow/ruby/red-arrow/.gitignore b/src/arrow/ruby/red-arrow/.gitignore new file mode 100644 index 000000000..3330f8657 --- /dev/null +++ b/src/arrow/ruby/red-arrow/.gitignore @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +/.yardoc/ +/Gemfile.lock +/doc/reference/ +/ext/arrow/Makefile +/ext/arrow/mkmf.log +/pkg/ diff --git a/src/arrow/ruby/red-arrow/.yardopts b/src/arrow/ruby/red-arrow/.yardopts new file mode 100644 index 000000000..67159b1dc --- /dev/null +++ b/src/arrow/ruby/red-arrow/.yardopts @@ -0,0 +1,6 @@ +--output-dir doc/reference +--markup markdown +--no-private +lib/**/*.rb +- +doc/text/* diff --git a/src/arrow/ruby/red-arrow/Gemfile b/src/arrow/ruby/red-arrow/Gemfile new file mode 100644 index 000000000..3907918c8 --- /dev/null +++ b/src/arrow/ruby/red-arrow/Gemfile @@ -0,0 +1,22 @@ +# -*- ruby -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +source "https://rubygems.org/" + +gemspec diff --git a/src/arrow/ruby/red-arrow/LICENSE.txt b/src/arrow/ruby/red-arrow/LICENSE.txt new file mode 100644 index 000000000..d64569567 --- /dev/null +++ b/src/arrow/ruby/red-arrow/LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/src/arrow/ruby/red-arrow/NOTICE.txt b/src/arrow/ruby/red-arrow/NOTICE.txt new file mode 100644 index 000000000..e08aeda8a --- /dev/null +++ b/src/arrow/ruby/red-arrow/NOTICE.txt @@ -0,0 +1,2 @@ +Apache Arrow +Copyright 2016 The Apache Software Foundation diff --git a/src/arrow/ruby/red-arrow/README.md b/src/arrow/ruby/red-arrow/README.md new file mode 100644 index 000000000..4249eeae6 --- /dev/null +++ b/src/arrow/ruby/red-arrow/README.md @@ -0,0 +1,75 @@ +<!--- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +# Red Arrow - Apache Arrow Ruby + +Red Arrow is the Ruby bindings of Apache Arrow. Red Arrow is based on GObject Introspection. + +[Apache Arrow](https://arrow.apache.org/) is an in-memory columnar data store. It's used by many products for data analytics. + +[GObject Introspection](https://wiki.gnome.org/action/show/Projects/GObjectIntrospection) is a middleware for language bindings of C library. GObject Introspection can generate language bindings automatically at runtime. + +Red Arrow uses [Apache Arrow GLib](https://github.com/apache/arrow/tree/master/c_glib) and [gobject-introspection gem](https://rubygems.org/gems/gobject-introspection) to generate Ruby bindings of Apache Arrow. + +Apache Arrow GLib is a C wrapper for [Apache Arrow C++](https://github.com/apache/arrow/tree/master/cpp). GObject Introspection can't use Apache Arrow C++ directly. Apache Arrow GLib is a bridge between Apache Arrow C++ and GObject Introspection. + +gobject-introspection gem is a Ruby bindings of GObject Introspection. Red Arrow uses GObject Introspection via gobject-introspection gem. + +## Install + +Install Apache Arrow GLib before install Red Arrow. See [Apache Arrow install document](https://arrow.apache.org/install/) for details. + +Install Red Arrow after you install Apache Arrow GLib: + +```console +% gem install red-arrow +``` + +## Usage + +```ruby +require "arrow" + +table = Arrow::Table.load("/dev/shm/data.arrow") +# Process data in table +table.save("/dev/shm/data-processed.arrow") +``` + +## Development + +Note that you need to install Apache Arrow C++/GLib at master before preparing Red Arrow. See also: + + * For Apache Arrow C++: https://arrow.apache.org/docs/developers/cpp/building.html + * For Apache Arrow GLib: https://github.com/apache/arrow/blob/master/c_glib/README.md + +```console +$ cd ruby/red-arrow +$ bundle install +$ bundle exec rake test +``` + +### For macOS with Homebrew + +```console +$ cd ruby/red-arrow +$ bundle install +$ brew install apache-arrow --head +$ brew install apache-arrow-glib --head +$ bundle exec rake test +```
\ No newline at end of file diff --git a/src/arrow/ruby/red-arrow/Rakefile b/src/arrow/ruby/red-arrow/Rakefile new file mode 100644 index 000000000..dd2c310b6 --- /dev/null +++ b/src/arrow/ruby/red-arrow/Rakefile @@ -0,0 +1,100 @@ +# -*- ruby -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "bundler/gem_helper" +require "rake/clean" +require "yard" + +base_dir = File.join(__dir__) + +helper = Bundler::GemHelper.new(base_dir) +helper.install +spec = helper.gemspec + +release_task = Rake::Task["release"] +release_task.prerequisites.replace(["build", "release:rubygem_push"]) + +def run_extconf(build_dir, extension_dir, *arguments) + cd(build_dir) do + ruby(File.join(extension_dir, "extconf.rb"), + *arguments) + end +end + +spec.extensions.each do |extension| + extension_dir = File.join(base_dir, File.dirname(extension)) + build_dir = ENV["BUILD_DIR"] + if build_dir + build_dir = File.join(build_dir, "red-arrow") + directory build_dir + else + build_dir = extension_dir + end + CLOBBER << File.join(build_dir, "Makefile") + CLOBBER << File.join(build_dir, "mkmf.log") + + makefile = File.join(build_dir, "Makefile") + file makefile => build_dir do + run_extconf(build_dir, extension_dir) + end + + desc "Configure" + task :configure => build_dir do + run_extconf(build_dir, extension_dir) + end + + desc "Compile" + task :compile => makefile do + cd(build_dir) do + sh("make") + end + end + + task :clean do + cd(build_dir) do + sh("make", "clean") if File.exist?("Makefile") + end + end +end + +desc "Run tests" +task :test do + cd(base_dir) do + ruby("test/run-test.rb") + end +end + +task default: :test + +desc "Run benchmarks" +task :benchmark do + benchmarks = if ENV["BENCHMARKS"] + ENV["BENCHMARKS"].split + else + FileList["benchmark/{,*/**/}*.yml"] + end + cd(base_dir) do + benchmarks.each do |benchmark| + sh("benchmark-driver", benchmark) + end + end +end + +YARD::Rake::YardocTask.new do |task| +end diff --git a/src/arrow/ruby/red-arrow/benchmark/raw-records/boolean.yml b/src/arrow/ruby/red-arrow/benchmark/raw-records/boolean.yml new file mode 100644 index 000000000..5e2551e2c --- /dev/null +++ b/src/arrow/ruby/red-arrow/benchmark/raw-records/boolean.yml @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +contexts: + - name: master + prelude: | + $LOAD_PATH.unshift(File.expand_path("ext/arrow")) + $LOAD_PATH.unshift(File.expand_path("lib")) +prelude: |- + require "arrow" + require "faker" + + state = ENV.fetch("FAKER_RANDOM_SEED", 17).to_i + Faker::Config.random = Random.new(state) + + n_rows = 1000 + n_columns = 10 + type = :boolean + + fields = {} + arrays = {} + n_columns.times do |i| + column_name = "column_#{i}" + fields[column_name] = type + arrays[column_name] = n_rows.times.map { Faker::Boolean.boolean } + end + record_batch = Arrow::RecordBatch.new(fields, arrays) + + def pure_ruby_raw_records(record_batch) + n_rows = record_batch.n_rows + n_columns = record_batch.n_columns + columns = record_batch.columns + records = [] + i = 0 + while i < n_rows + record = [] + j = 0 + while j < n_columns + record << columns[j][i] + j += 1 + end + records << record + i += 1 + end + records + end +benchmark: + pure_ruby: |- + pure_ruby_raw_records(record_batch) + raw_records: |- + record_batch.raw_records diff --git a/src/arrow/ruby/red-arrow/benchmark/raw-records/decimal128.yml b/src/arrow/ruby/red-arrow/benchmark/raw-records/decimal128.yml new file mode 100644 index 000000000..367e7c713 --- /dev/null +++ b/src/arrow/ruby/red-arrow/benchmark/raw-records/decimal128.yml @@ -0,0 +1,68 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +contexts: + - name: master + prelude: | + $LOAD_PATH.unshift(File.expand_path("ext/arrow")) + $LOAD_PATH.unshift(File.expand_path("lib")) +prelude: |- + require "arrow" + require "faker" + + state = ENV.fetch("FAKER_RANDOM_SEED", 17).to_i + Faker::Config.random = Random.new(state) + + n_rows = 1000 + n_columns = 10 + type = Arrow::Decimal128DataType.new(10, 5) + + fields = {} + arrays = {} + n_columns.times do |i| + column_name = "column_#{i}" + fields[column_name] = type + arrays[column_name] = n_rows.times.map do + Faker::Number.decimal(l_digits: 10, r_digits: 5) + end + end + record_batch = Arrow::RecordBatch.new(fields, arrays) + + def pure_ruby_raw_records(record_batch) + n_rows = record_batch.n_rows + n_columns = record_batch.n_columns + columns = record_batch.columns + records = [] + i = 0 + while i < n_rows + record = [] + j = 0 + while j < n_columns + x = columns[j][i] + record << BigDecimal(x.to_s) + j += 1 + end + records << record + i += 1 + end + records + end +benchmark: + pure_ruby: |- + pure_ruby_raw_records(record_batch) + raw_records: |- + record_batch.raw_records() diff --git a/src/arrow/ruby/red-arrow/benchmark/raw-records/dictionary.yml b/src/arrow/ruby/red-arrow/benchmark/raw-records/dictionary.yml new file mode 100644 index 000000000..151bb412f --- /dev/null +++ b/src/arrow/ruby/red-arrow/benchmark/raw-records/dictionary.yml @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +contexts: + - name: master + prelude: | + $LOAD_PATH.unshift(File.expand_path("ext/arrow")) + $LOAD_PATH.unshift(File.expand_path("lib")) +prelude: |- + require "arrow" + require "faker" + + state = ENV.fetch("FAKER_RANDOM_SEED", 17).to_i + Faker::Config.random = Random.new(state) + + n_rows = 1000 + n_columns = 10 + type = Arrow::DictionaryDataType.new(:int8, :string, true) + + fields = n_columns.times.map {|i| ["column_#{i}".to_sym, type] }.to_h + schema = Arrow::Schema.new(**fields) + dictionary = Arrow::StringArray.new( + 100.times.map { Faker::Book.genre }.uniq.sort + ) + indices = Arrow::Int8Array.new( + n_rows.times.map { + Faker::Number.within(range: 0 ... dictionary.length) + } + ) + arrays = n_columns.times.map do + Arrow::DictionaryArray.new( + type, + indices, + dictionary, + ) + end + record_batch = Arrow::RecordBatch.new(schema, n_rows, arrays) + + def pure_ruby_raw_records(record_batch) + n_rows = record_batch.n_rows + n_columns = record_batch.n_columns + columns = record_batch.columns + records = [] + i = 0 + while i < n_rows + record = [] + j = 0 + while j < n_columns + record << columns[j].data.indices[i] + j += 1 + end + records << record + i += 1 + end + records + end +benchmark: + pure_ruby: |- + pure_ruby_raw_records(record_batch) + raw_records: |- + record_batch.raw_records diff --git a/src/arrow/ruby/red-arrow/benchmark/raw-records/int64.yml b/src/arrow/ruby/red-arrow/benchmark/raw-records/int64.yml new file mode 100644 index 000000000..bd03ab942 --- /dev/null +++ b/src/arrow/ruby/red-arrow/benchmark/raw-records/int64.yml @@ -0,0 +1,67 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +contexts: + - name: master + prelude: | + $LOAD_PATH.unshift(File.expand_path("ext/arrow")) + $LOAD_PATH.unshift(File.expand_path("lib")) +prelude: |- + require "arrow" + require "faker" + + state = ENV.fetch("FAKER_RANDOM_SEED", 17).to_i + Faker::Config.random = Random.new(state) + + n_rows = 1000 + n_columns = 10 + type = :int64 + + fields = {} + arrays = {} + n_columns.times do |i| + column_name = "column_#{i}" + fields[column_name] = type + arrays[column_name] = n_rows.times.map do + Faker::Number.number(digits: 18).to_i + end + end + record_batch = Arrow::RecordBatch.new(fields, arrays) + + def pure_ruby_raw_records(record_batch) + n_rows = record_batch.n_rows + n_columns = record_batch.n_columns + columns = record_batch.columns + records = [] + i = 0 + while i < n_rows + record = [] + j = 0 + while j < n_columns + record << columns[j][i] + j += 1 + end + records << record + i += 1 + end + records + end +benchmark: + pure_ruby: |- + pure_ruby_raw_records(record_batch) + raw_records: |- + record_batch.raw_records diff --git a/src/arrow/ruby/red-arrow/benchmark/raw-records/list.yml b/src/arrow/ruby/red-arrow/benchmark/raw-records/list.yml new file mode 100644 index 000000000..b9a526710 --- /dev/null +++ b/src/arrow/ruby/red-arrow/benchmark/raw-records/list.yml @@ -0,0 +1,70 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +contexts: + - name: master + prelude: | + $LOAD_PATH.unshift(File.expand_path("ext/arrow")) + $LOAD_PATH.unshift(File.expand_path("lib")) +prelude: |- + require "arrow" + require "faker" + + state = ENV.fetch("FAKER_RANDOM_SEED", 17).to_i + Faker::Config.random = Random.new(state) + + n_rows = 1000 + n_columns = 10 + type = Arrow::ListDataType.new(name: "values", type: :double) + + fields = {} + arrays = {} + n_columns.times do |i| + column_name = "column_#{i}" + fields[column_name] = type + arrays[column_name] = n_rows.times.map do + n_elements = Faker::Number.within(range: 1 ... 100) + n_elements.times.map do + Faker::Number.normal(mean: 0, standard_deviation: 1e+6) + end + end + end + record_batch = Arrow::RecordBatch.new(fields, arrays) + + def pure_ruby_raw_records(record_batch) + n_rows = record_batch.n_rows + n_columns = record_batch.n_columns + columns = record_batch.columns + records = [] + i = 0 + while i < n_rows + record = [] + j = 0 + while j < n_columns + record << columns[j][i] + j += 1 + end + records << record + i += 1 + end + records + end +benchmark: + pure_ruby: |- + pure_ruby_raw_records(record_batch) + raw_records: |- + record_batch.raw_records diff --git a/src/arrow/ruby/red-arrow/benchmark/raw-records/string.yml b/src/arrow/ruby/red-arrow/benchmark/raw-records/string.yml new file mode 100644 index 000000000..2854a376b --- /dev/null +++ b/src/arrow/ruby/red-arrow/benchmark/raw-records/string.yml @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +contexts: + - name: master + prelude: | + $LOAD_PATH.unshift(File.expand_path("ext/arrow")) + $LOAD_PATH.unshift(File.expand_path("lib")) +prelude: |- + require "arrow" + require "faker" + + state = ENV.fetch("FAKER_RANDOM_SEED", 17).to_i + Faker::Config.random = Random.new(state) + + n_rows = 1000 + n_columns = 10 + type = :string + + fields = {} + arrays = {} + n_columns.times do |i| + column_name = "column_#{i}" + fields[column_name] = type + arrays[column_name] = n_rows.times.map { Faker::Name.name } + end + record_batch = Arrow::RecordBatch.new(fields, arrays) + + def pure_ruby_raw_records(record_batch) + n_rows = record_batch.n_rows + n_columns = record_batch.n_columns + columns = record_batch.columns + records = [] + i = 0 + while i < n_rows + record = [] + j = 0 + while j < n_columns + record << columns[j][i] + j += 1 + end + records << record + i += 1 + end + records + end +benchmark: + pure_ruby: |- + pure_ruby_raw_records(record_batch) + raw_records: |- + record_batch.raw_records diff --git a/src/arrow/ruby/red-arrow/benchmark/raw-records/timestamp.yml b/src/arrow/ruby/red-arrow/benchmark/raw-records/timestamp.yml new file mode 100644 index 000000000..9b65b790a --- /dev/null +++ b/src/arrow/ruby/red-arrow/benchmark/raw-records/timestamp.yml @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +contexts: + - name: master + prelude: | + $LOAD_PATH.unshift(File.expand_path("ext/arrow")) + $LOAD_PATH.unshift(File.expand_path("lib")) +prelude: |- + require "arrow" + require "faker" + + state = ENV.fetch("FAKER_RANDOM_SEED", 17).to_i + Faker::Config.random = Random.new(state) + + n_rows = 1000 + n_columns = 10 + type = Arrow::TimestampDataType.new(:micro) + base_timestamp = Time.at(Faker::Number.within(range: 0 ... 1_000_000_000)) + thirty_days_in_sec = 30*24*3600 + timestamp_range = { + from: base_timestamp - thirty_days_in_sec, + to: base_timestamp + thirty_days_in_sec, + } + + fields = {} + arrays = {} + n_columns.times do |i| + column_name = "column_#{i}" + fields[column_name] = type + arrays[column_name] = n_rows.times.map do + sec = Faker::Time.between(timestamp_range).to_i + micro = Faker::Number.within(range: 0 ... 1_000_000) + sec * 1_000_000 + micro + end + end + record_batch = Arrow::RecordBatch.new(fields, arrays) + + def pure_ruby_raw_records(record_batch) + n_rows = record_batch.n_rows + n_columns = record_batch.n_columns + columns = record_batch.columns + records = [] + i = 0 + while i < n_rows + record = [] + j = 0 + while j < n_columns + record << columns[j][i] + j += 1 + end + records << record + i += 1 + end + records + end +benchmark: + pure_ruby: |- + pure_ruby_raw_records(record_batch) + raw_records: |- + record_batch.raw_records diff --git a/src/arrow/ruby/red-arrow/benchmark/values/boolean.yml b/src/arrow/ruby/red-arrow/benchmark/values/boolean.yml new file mode 100644 index 000000000..45abff523 --- /dev/null +++ b/src/arrow/ruby/red-arrow/benchmark/values/boolean.yml @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +contexts: + - name: master + prelude: | + $LOAD_PATH.unshift(File.expand_path("ext/arrow")) + $LOAD_PATH.unshift(File.expand_path("lib")) +prelude: |- + require "arrow" + require "faker" + + state = ENV.fetch("FAKER_RANDOM_SEED", 17).to_i + Faker::Config.random = Random.new(state) + + n_values = 1000 + values = n_values.times.map { Faker::Boolean.boolean } + array = Arrow::BooleanArray.new(values) +benchmark: + pure_ruby: |- + array.collect.to_a + values: |- + array.values diff --git a/src/arrow/ruby/red-arrow/benchmark/values/decimal128.yml b/src/arrow/ruby/red-arrow/benchmark/values/decimal128.yml new file mode 100644 index 000000000..4a2a5bff5 --- /dev/null +++ b/src/arrow/ruby/red-arrow/benchmark/values/decimal128.yml @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +contexts: + - name: master + prelude: | + $LOAD_PATH.unshift(File.expand_path("ext/arrow")) + $LOAD_PATH.unshift(File.expand_path("lib")) +prelude: |- + require "arrow" + require "faker" + + state = ENV.fetch("FAKER_RANDOM_SEED", 17).to_i + Faker::Config.random = Random.new(state) + + n_values = 1000 + type = Arrow::Decimal128DataType.new(10, 5) + values = n_values.times.map { Faker::Number.decimal(l_digits: 10, r_digits: 5) } + array = Arrow::Decimal128Array.new(type, values) +benchmark: + pure_ruby: |- + array.collect.to_a + values: |- + array.values diff --git a/src/arrow/ruby/red-arrow/benchmark/values/dictionary.yml b/src/arrow/ruby/red-arrow/benchmark/values/dictionary.yml new file mode 100644 index 000000000..5b4f20dc8 --- /dev/null +++ b/src/arrow/ruby/red-arrow/benchmark/values/dictionary.yml @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +contexts: + - name: master + prelude: | + $LOAD_PATH.unshift(File.expand_path("ext/arrow")) + $LOAD_PATH.unshift(File.expand_path("lib")) +prelude: |- + require "arrow" + require "faker" + + state = ENV.fetch("FAKER_RANDOM_SEED", 17).to_i + Faker::Config.random = Random.new(state) + + n_values = 1000 + type = Arrow::DictionaryDataType.new(:int8, :string, true) + + dictionary = Arrow::StringArray.new( + 100.times.map { Faker::Book.genre }.uniq.sort + ) + indices = Arrow::Int8Array.new( + n_values.times.map { + Faker::Number.within(range: 0 ... dictionary.length) + } + ) + array = Arrow::DictionaryArray.new(type, indices, dictionary) +benchmark: + pure_ruby: |- + array.length.times.collect {|i| array.indices[i]} + values: |- + array.values diff --git a/src/arrow/ruby/red-arrow/benchmark/values/int64.yml b/src/arrow/ruby/red-arrow/benchmark/values/int64.yml new file mode 100644 index 000000000..d9e89261a --- /dev/null +++ b/src/arrow/ruby/red-arrow/benchmark/values/int64.yml @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +contexts: + - name: master + prelude: | + $LOAD_PATH.unshift(File.expand_path("ext/arrow")) + $LOAD_PATH.unshift(File.expand_path("lib")) +prelude: |- + require "arrow" + require "faker" + + state = ENV.fetch("FAKER_RANDOM_SEED", 17).to_i + Faker::Config.random = Random.new(state) + + n_values = 1000 + values = n_values.times.map { Faker::Number.number(digits: 18).to_i } + array = Arrow::Int64Array.new(values) +benchmark: + pure_ruby: |- + array.collect.to_a + values: |- + array.values diff --git a/src/arrow/ruby/red-arrow/benchmark/values/list.yml b/src/arrow/ruby/red-arrow/benchmark/values/list.yml new file mode 100644 index 000000000..2764c1a61 --- /dev/null +++ b/src/arrow/ruby/red-arrow/benchmark/values/list.yml @@ -0,0 +1,44 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +contexts: + - name: master + prelude: | + $LOAD_PATH.unshift(File.expand_path("ext/arrow")) + $LOAD_PATH.unshift(File.expand_path("lib")) +prelude: |- + require "arrow" + require "faker" + + state = ENV.fetch("FAKER_RANDOM_SEED", 17).to_i + Faker::Config.random = Random.new(state) + + n_values = 1000 + type = Arrow::ListDataType.new(name: "values", type: :double) + + values = n_values.times.map do + n_elements = Faker::Number.within(range: 1 ... 100) + n_elements.times.map do + Faker::Number.normal(mean: 0, standard_deviation: 1e+6) + end + end + array = Arrow::ListArray.new(type, values) +benchmark: + pure_ruby: |- + array.collect.to_a + values: |- + array.values diff --git a/src/arrow/ruby/red-arrow/benchmark/values/string.yml b/src/arrow/ruby/red-arrow/benchmark/values/string.yml new file mode 100644 index 000000000..8a40deaa0 --- /dev/null +++ b/src/arrow/ruby/red-arrow/benchmark/values/string.yml @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +contexts: + - name: master + prelude: | + $LOAD_PATH.unshift(File.expand_path("ext/arrow")) + $LOAD_PATH.unshift(File.expand_path("lib")) +prelude: |- + require "arrow" + require "faker" + + state = ENV.fetch("FAKER_RANDOM_SEED", 17).to_i + Faker::Config.random = Random.new(state) + + n_values = 1000 + + values = n_values.times.map { Faker::Name.name } + array = Arrow::StringArray.new(values) +benchmark: + pure_ruby: |- + array.collect.to_a + values: |- + array.values diff --git a/src/arrow/ruby/red-arrow/benchmark/values/timestamp.yml b/src/arrow/ruby/red-arrow/benchmark/values/timestamp.yml new file mode 100644 index 000000000..4af46d1db --- /dev/null +++ b/src/arrow/ruby/red-arrow/benchmark/values/timestamp.yml @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +contexts: + - name: master + prelude: | + $LOAD_PATH.unshift(File.expand_path("ext/arrow")) + $LOAD_PATH.unshift(File.expand_path("lib")) +prelude: |- + require "arrow" + require "faker" + + state = ENV.fetch("FAKER_RANDOM_SEED", 17).to_i + Faker::Config.random = Random.new(state) + + n_values = 1000 + type = Arrow::TimestampDataType.new(:micro) + base_timestamp = Time.at(Faker::Number.within(range: 0 ... 1_000_000_000)) + thirty_days_in_sec = 30*24*3600 + timestamp_range = { + from: base_timestamp - thirty_days_in_sec, + to: base_timestamp + thirty_days_in_sec, + } + + values = n_values.times.map do + sec = Faker::Time.between(timestamp_range).to_i + micro = Faker::Number.within(range: 0 ... 1_000_000) + sec * 1_000_000 + micro + end + array = Arrow::TimestampArray.new(type, values) +benchmark: + pure_ruby: |- + array.collect.to_a + values: |- + array.values diff --git a/src/arrow/ruby/red-arrow/doc/text/development.md b/src/arrow/ruby/red-arrow/doc/text/development.md new file mode 100644 index 000000000..cc86de35f --- /dev/null +++ b/src/arrow/ruby/red-arrow/doc/text/development.md @@ -0,0 +1,34 @@ +<!--- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +# Development + +## Naming convention + +### Reader and Writer + +Reader and Writer require an opened IO stream. + +### Loader and Saver + +Loader and Saver require a path. They are convenient classes. + +Loader opens the path and reads data by Reader. + +Writer opens the path and writes data by Writer. diff --git a/src/arrow/ruby/red-arrow/example/read-file.rb b/src/arrow/ruby/red-arrow/example/read-file.rb new file mode 100755 index 000000000..9a99d3377 --- /dev/null +++ b/src/arrow/ruby/red-arrow/example/read-file.rb @@ -0,0 +1,36 @@ +#!/usr/bin/env ruby +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "arrow" + +Arrow::MemoryMappedInputStream.open("/tmp/file.arrow") do |input| + reader = Arrow::RecordBatchFileReader.new(input) + fields = reader.schema.fields + reader.each_with_index do |record_batch, i| + puts("=" * 40) + puts("record-batch[#{i}]:") + fields.each do |field| + field_name = field.name + values = record_batch.collect do |record| + record[field_name] + end + puts(" #{field_name}: #{values.inspect}") + end + end +end diff --git a/src/arrow/ruby/red-arrow/example/read-stream.rb b/src/arrow/ruby/red-arrow/example/read-stream.rb new file mode 100755 index 000000000..c7197120c --- /dev/null +++ b/src/arrow/ruby/red-arrow/example/read-stream.rb @@ -0,0 +1,36 @@ +#!/usr/bin/env ruby +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "arrow" + +Arrow::MemoryMappedInputStream.open("/tmp/stream.arrow") do |input| + reader = Arrow::RecordBatchStreamReader.new(input) + fields = reader.schema.fields + reader.each_with_index do |record_batch, i| + puts("=" * 40) + puts("record-batch[#{i}]:") + fields.each do |field| + field_name = field.name + values = record_batch.collect do |record| + record[field_name] + end + puts(" #{field_name}: #{values.inspect}") + end + end +end diff --git a/src/arrow/ruby/red-arrow/example/write-file.rb b/src/arrow/ruby/red-arrow/example/write-file.rb new file mode 100755 index 000000000..c55ab2ef2 --- /dev/null +++ b/src/arrow/ruby/red-arrow/example/write-file.rb @@ -0,0 +1,63 @@ +#!/usr/bin/env ruby +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "arrow" + +fields = [ + Arrow::Field.new("uint8", :uint8), + Arrow::Field.new("uint16", :uint16), + Arrow::Field.new("uint32", :uint32), + Arrow::Field.new("uint64", :uint64), + Arrow::Field.new("int8", :int8), + Arrow::Field.new("int16", :int16), + Arrow::Field.new("int32", :int32), + Arrow::Field.new("int64", :int64), + Arrow::Field.new("float", :float), + Arrow::Field.new("double", :double), +] +schema = Arrow::Schema.new(fields) + +Arrow::FileOutputStream.open("/tmp/file.arrow", false) do |output| + Arrow::RecordBatchFileWriter.open(output, schema) do |writer| + uints = [1, 2, 4, 8] + ints = [1, -2, 4, -8] + floats = [1.1, -2.2, 4.4, -8.8] + columns = [ + Arrow::UInt8Array.new(uints), + Arrow::UInt16Array.new(uints), + Arrow::UInt32Array.new(uints), + Arrow::UInt64Array.new(uints), + Arrow::Int8Array.new(ints), + Arrow::Int16Array.new(ints), + Arrow::Int32Array.new(ints), + Arrow::Int64Array.new(ints), + Arrow::FloatArray.new(floats), + Arrow::DoubleArray.new(floats), + ] + + record_batch = Arrow::RecordBatch.new(schema, 4, columns) + writer.write_record_batch(record_batch) + + sliced_columns = columns.collect do |column| + column.slice(1, 3) + end + record_batch = Arrow::RecordBatch.new(schema, 3, sliced_columns) + writer.write_record_batch(record_batch) + end +end diff --git a/src/arrow/ruby/red-arrow/example/write-stream.rb b/src/arrow/ruby/red-arrow/example/write-stream.rb new file mode 100755 index 000000000..fde486206 --- /dev/null +++ b/src/arrow/ruby/red-arrow/example/write-stream.rb @@ -0,0 +1,63 @@ +#!/usr/bin/env ruby +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "arrow" + +fields = [ + Arrow::Field.new("uint8", :uint8), + Arrow::Field.new("uint16", :uint16), + Arrow::Field.new("uint32", :uint32), + Arrow::Field.new("uint64", :uint64), + Arrow::Field.new("int8", :int8), + Arrow::Field.new("int16", :int16), + Arrow::Field.new("int32", :int32), + Arrow::Field.new("int64", :int64), + Arrow::Field.new("float", :float), + Arrow::Field.new("double", :double), +] +schema = Arrow::Schema.new(fields) + +Arrow::FileOutputStream.open("/tmp/stream.arrow", false) do |output| + Arrow::RecordBatchStreamWriter.open(output, schema) do |writer| + uints = [1, 2, 4, 8] + ints = [1, -2, 4, -8] + floats = [1.1, -2.2, 4.4, -8.8] + columns = [ + Arrow::UInt8Array.new(uints), + Arrow::UInt16Array.new(uints), + Arrow::UInt32Array.new(uints), + Arrow::UInt64Array.new(uints), + Arrow::Int8Array.new(ints), + Arrow::Int16Array.new(ints), + Arrow::Int32Array.new(ints), + Arrow::Int64Array.new(ints), + Arrow::FloatArray.new(floats), + Arrow::DoubleArray.new(floats), + ] + + record_batch = Arrow::RecordBatch.new(schema, 4, columns) + writer.write_record_batch(record_batch) + + sliced_columns = columns.collect do |column| + column.slice(1, 3) + end + record_batch = Arrow::RecordBatch.new(schema, 3, sliced_columns) + writer.write_record_batch(record_batch) + end +end diff --git a/src/arrow/ruby/red-arrow/ext/arrow/arrow.cpp b/src/arrow/ruby/red-arrow/ext/arrow/arrow.cpp new file mode 100644 index 000000000..86c8c8fb6 --- /dev/null +++ b/src/arrow/ruby/red-arrow/ext/arrow/arrow.cpp @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "red-arrow.hpp" +#include "memory-view.hpp" + +#include <ruby.hpp> + +namespace red_arrow { + VALUE cDate; + + VALUE cArrowTime; + + VALUE ArrowTimeUnitSECOND; + VALUE ArrowTimeUnitMILLI; + VALUE ArrowTimeUnitMICRO; + VALUE ArrowTimeUnitNANO; + + ID id_BigDecimal; + ID id_jd; + ID id_new; + ID id_to_datetime; +} + +extern "C" void Init_arrow() { + auto mArrow = rb_const_get_at(rb_cObject, rb_intern("Arrow")); + + auto cArrowArray = rb_const_get_at(mArrow, rb_intern("Array")); + rb_define_method(cArrowArray, "values", + reinterpret_cast<rb::RawMethod>(red_arrow::array_values), + 0); + + auto cArrowChunkedArray = rb_const_get_at(mArrow, rb_intern("ChunkedArray")); + rb_define_method(cArrowChunkedArray, "values", + reinterpret_cast<rb::RawMethod>(red_arrow::chunked_array_values), + 0); + + auto cArrowRecordBatch = rb_const_get_at(mArrow, rb_intern("RecordBatch")); + rb_define_method(cArrowRecordBatch, "raw_records", + reinterpret_cast<rb::RawMethod>(red_arrow::record_batch_raw_records), + 0); + + auto cArrowTable = rb_const_get_at(mArrow, rb_intern("Table")); + rb_define_method(cArrowTable, "raw_records", + reinterpret_cast<rb::RawMethod>(red_arrow::table_raw_records), + 0); + + red_arrow::cDate = rb_const_get(rb_cObject, rb_intern("Date")); + + red_arrow::cArrowTime = rb_const_get_at(mArrow, rb_intern("Time")); + + auto cArrowTimeUnit = rb_const_get_at(mArrow, rb_intern("TimeUnit")); + red_arrow::ArrowTimeUnitSECOND = + rb_const_get_at(cArrowTimeUnit, rb_intern("SECOND")); + red_arrow::ArrowTimeUnitMILLI = + rb_const_get_at(cArrowTimeUnit, rb_intern("MILLI")); + red_arrow::ArrowTimeUnitMICRO = + rb_const_get_at(cArrowTimeUnit, rb_intern("MICRO")); + red_arrow::ArrowTimeUnitNANO = + rb_const_get_at(cArrowTimeUnit, rb_intern("NANO")); + + red_arrow::id_BigDecimal = rb_intern("BigDecimal"); + red_arrow::id_jd = rb_intern("jd"); + red_arrow::id_new = rb_intern("new"); + red_arrow::id_to_datetime = rb_intern("to_datetime"); + + red_arrow::memory_view::init(mArrow); +} diff --git a/src/arrow/ruby/red-arrow/ext/arrow/converters.cpp b/src/arrow/ruby/red-arrow/ext/arrow/converters.cpp new file mode 100644 index 000000000..f3bfa6f34 --- /dev/null +++ b/src/arrow/ruby/red-arrow/ext/arrow/converters.cpp @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "converters.hpp" + +namespace red_arrow { + VALUE ArrayValueConverter::convert(const arrow::ListArray& array, + const int64_t i) { + return list_array_value_converter_->convert(array, i); + } + + VALUE ArrayValueConverter::convert(const arrow::StructArray& array, + const int64_t i) { + return struct_array_value_converter_->convert(array, i); + } + + VALUE ArrayValueConverter::convert(const arrow::MapArray& array, + const int64_t i) { + return map_array_value_converter_->convert(array, i); + } + + VALUE ArrayValueConverter::convert(const arrow::UnionArray& array, + const int64_t i) { + return union_array_value_converter_->convert(array, i); + } + + VALUE ArrayValueConverter::convert(const arrow::DictionaryArray& array, + const int64_t i) { + return dictionary_array_value_converter_->convert(array, i); + } +} diff --git a/src/arrow/ruby/red-arrow/ext/arrow/converters.hpp b/src/arrow/ruby/red-arrow/ext/arrow/converters.hpp new file mode 100644 index 000000000..f7532f951 --- /dev/null +++ b/src/arrow/ruby/red-arrow/ext/arrow/converters.hpp @@ -0,0 +1,795 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "red-arrow.hpp" + +#include <ruby.hpp> +#include <ruby/encoding.h> + +#include <arrow-glib/error.hpp> + +#include <arrow/util/logging.h> + +namespace red_arrow { + class ListArrayValueConverter; + class StructArrayValueConverter; + class MapArrayValueConverter; + class UnionArrayValueConverter; + class DictionaryArrayValueConverter; + + class ArrayValueConverter { + public: + ArrayValueConverter() + : decimal_buffer_(), + list_array_value_converter_(nullptr), + struct_array_value_converter_(nullptr), + map_array_value_converter_(nullptr), + union_array_value_converter_(nullptr), + dictionary_array_value_converter_(nullptr) { + } + + inline void set_sub_value_converters(ListArrayValueConverter* list_array_value_converter, + StructArrayValueConverter* struct_array_value_converter, + MapArrayValueConverter* map_array_value_converter, + UnionArrayValueConverter* union_array_value_converter, + DictionaryArrayValueConverter* dictionary_array_value_converter) { + list_array_value_converter_ = list_array_value_converter; + struct_array_value_converter_ = struct_array_value_converter; + map_array_value_converter_ = map_array_value_converter; + union_array_value_converter_ = union_array_value_converter; + dictionary_array_value_converter_ = dictionary_array_value_converter; + } + + inline VALUE convert(const arrow::NullArray& array, + const int64_t i) { + return Qnil; + } + + inline VALUE convert(const arrow::BooleanArray& array, + const int64_t i) { + return array.Value(i) ? Qtrue : Qfalse; + } + + inline VALUE convert(const arrow::Int8Array& array, + const int64_t i) { + return INT2NUM(array.Value(i)); + } + + inline VALUE convert(const arrow::Int16Array& array, + const int64_t i) { + return INT2NUM(array.Value(i)); + } + + inline VALUE convert(const arrow::Int32Array& array, + const int64_t i) { + return INT2NUM(array.Value(i)); + } + + inline VALUE convert(const arrow::Int64Array& array, + const int64_t i) { + return LL2NUM(array.Value(i)); + } + + inline VALUE convert(const arrow::UInt8Array& array, + const int64_t i) { + return UINT2NUM(array.Value(i)); + } + + inline VALUE convert(const arrow::UInt16Array& array, + const int64_t i) { + return UINT2NUM(array.Value(i)); + } + + inline VALUE convert(const arrow::UInt32Array& array, + const int64_t i) { + return UINT2NUM(array.Value(i)); + } + + inline VALUE convert(const arrow::UInt64Array& array, + const int64_t i) { + return ULL2NUM(array.Value(i)); + } + + // TODO + // inline VALUE convert(const arrow::HalfFloatArray& array, + // const int64_t i) { + // } + + inline VALUE convert(const arrow::FloatArray& array, + const int64_t i) { + return DBL2NUM(array.Value(i)); + } + + inline VALUE convert(const arrow::DoubleArray& array, + const int64_t i) { + return DBL2NUM(array.Value(i)); + } + + inline VALUE convert(const arrow::BinaryArray& array, + const int64_t i) { + int32_t length; + const auto value = array.GetValue(i, &length); + // TODO: encoding support + return rb_enc_str_new(reinterpret_cast<const char*>(value), + length, + rb_ascii8bit_encoding()); + } + + inline VALUE convert(const arrow::StringArray& array, + const int64_t i) { + int32_t length; + const auto value = array.GetValue(i, &length); + return rb_utf8_str_new(reinterpret_cast<const char*>(value), + length); + } + + inline VALUE convert(const arrow::FixedSizeBinaryArray& array, + const int64_t i) { + return rb_enc_str_new(reinterpret_cast<const char*>(array.Value(i)), + array.byte_width(), + rb_ascii8bit_encoding()); + } + + constexpr static int32_t JULIAN_DATE_UNIX_EPOCH = 2440588; + inline VALUE convert(const arrow::Date32Array& array, + const int64_t i) { + const auto value = array.Value(i); + const auto days_in_julian = value + JULIAN_DATE_UNIX_EPOCH; + return rb_funcall(cDate, id_jd, 1, LONG2NUM(days_in_julian)); + } + + inline VALUE convert(const arrow::Date64Array& array, + const int64_t i) { + const auto value = array.Value(i); + auto msec = LL2NUM(value); + auto sec = rb_rational_new(msec, INT2NUM(1000)); + auto time_value = rb_time_num_new(sec, Qnil); + return rb_funcall(time_value, id_to_datetime, 0, 0); + } + + inline VALUE convert(const arrow::Time32Array& array, + const int64_t i) { + const auto type = + arrow::internal::checked_cast<const arrow::Time32Type*>(array.type().get()); + const auto value = array.Value(i); + return rb_funcall(red_arrow::cArrowTime, + id_new, + 2, + time_unit_to_enum(type->unit()), + INT2NUM(value)); + } + + inline VALUE convert(const arrow::Time64Array& array, + const int64_t i) { + const auto type = + arrow::internal::checked_cast<const arrow::Time64Type*>(array.type().get()); + const auto value = array.Value(i); + return rb_funcall(red_arrow::cArrowTime, + id_new, + 2, + time_unit_to_enum(type->unit()), + LL2NUM(value)); + } + + inline VALUE convert(const arrow::TimestampArray& array, + const int64_t i) { + const auto type = + arrow::internal::checked_cast<const arrow::TimestampType*>(array.type().get()); + auto scale = time_unit_to_scale(type->unit()); + auto value = array.Value(i); + auto sec = rb_rational_new(LL2NUM(value), scale); + return rb_time_num_new(sec, Qnil); + } + + // TODO + // inline VALUE convert(const arrow::IntervalArray& array, + // const int64_t i) { + // }; + + VALUE convert(const arrow::ListArray& array, + const int64_t i); + + VALUE convert(const arrow::StructArray& array, + const int64_t i); + + VALUE convert(const arrow::MapArray& array, + const int64_t i); + + VALUE convert(const arrow::UnionArray& array, + const int64_t i); + + VALUE convert(const arrow::DictionaryArray& array, + const int64_t i); + + inline VALUE convert(const arrow::Decimal128Array& array, + const int64_t i) { + return convert_decimal(std::move(array.FormatValue(i))); + } + + inline VALUE convert(const arrow::Decimal256Array& array, + const int64_t i) { + return convert_decimal(std::move(array.FormatValue(i))); + } + + private: + inline VALUE convert_decimal(std::string&& value) { + decimal_buffer_ = value; + return rb_funcall(rb_cObject, + id_BigDecimal, + 1, + rb_enc_str_new(decimal_buffer_.data(), + decimal_buffer_.length(), + rb_ascii8bit_encoding())); + } + + std::string decimal_buffer_; + ListArrayValueConverter* list_array_value_converter_; + StructArrayValueConverter* struct_array_value_converter_; + MapArrayValueConverter* map_array_value_converter_; + UnionArrayValueConverter* union_array_value_converter_; + DictionaryArrayValueConverter* dictionary_array_value_converter_; + }; + + class ListArrayValueConverter : public arrow::ArrayVisitor { + public: + explicit ListArrayValueConverter(ArrayValueConverter* converter) + : array_value_converter_(converter), + offset_(0), + length_(0), + result_(Qnil) {} + + VALUE convert(const arrow::ListArray& array, const int64_t index) { + auto values = array.values().get(); + auto offset_keep = offset_; + auto length_keep = length_; + offset_ = array.value_offset(index); + length_ = array.value_length(index); + auto result_keep = result_; + result_ = rb_ary_new_capa(length_); + check_status(values->Accept(this), + "[raw-records][list-array]"); + offset_ = offset_keep; + length_ = length_keep; + auto result_return = result_; + result_ = result_keep; + return result_return; + } + +#define VISIT(TYPE) \ + arrow::Status Visit(const arrow::TYPE ## Array& array) override { \ + return visit_value(array); \ + } + + VISIT(Null) + VISIT(Boolean) + VISIT(Int8) + VISIT(Int16) + VISIT(Int32) + VISIT(Int64) + VISIT(UInt8) + VISIT(UInt16) + VISIT(UInt32) + VISIT(UInt64) + // TODO + // VISIT(HalfFloat) + VISIT(Float) + VISIT(Double) + VISIT(Binary) + VISIT(String) + VISIT(FixedSizeBinary) + VISIT(Date32) + VISIT(Date64) + VISIT(Time32) + VISIT(Time64) + VISIT(Timestamp) + // TODO + // VISIT(Interval) + VISIT(List) + VISIT(Struct) + VISIT(Map) + VISIT(SparseUnion) + VISIT(DenseUnion) + VISIT(Dictionary) + VISIT(Decimal128) + VISIT(Decimal256) + // TODO + // VISIT(Extension) + +#undef VISIT + + private: + template <typename ArrayType> + inline VALUE convert_value(const ArrayType& array, + const int64_t i) { + return array_value_converter_->convert(array, i); + } + + template <typename ArrayType> + arrow::Status visit_value(const ArrayType& array) { + if (array.null_count() > 0) { + for (int64_t i = 0; i < length_; ++i) { + auto value = Qnil; + if (!array.IsNull(i + offset_)) { + value = convert_value(array, i + offset_); + } + rb_ary_push(result_, value); + } + } else { + for (int64_t i = 0; i < length_; ++i) { + rb_ary_push(result_, convert_value(array, i + offset_)); + } + } + return arrow::Status::OK(); + } + + ArrayValueConverter* array_value_converter_; + int32_t offset_; + int32_t length_; + VALUE result_; + }; + + class StructArrayValueConverter : public arrow::ArrayVisitor { + public: + explicit StructArrayValueConverter(ArrayValueConverter* converter) + : array_value_converter_(converter), + key_(Qnil), + index_(0), + result_(Qnil) {} + + VALUE convert(const arrow::StructArray& array, + const int64_t index) { + auto index_keep = index_; + auto result_keep = result_; + index_ = index; + result_ = rb_hash_new(); + const auto struct_type = array.struct_type(); + const auto n = struct_type->num_fields(); + for (int i = 0; i < n; ++i) { + const auto field_type = struct_type->field(i).get(); + const auto& field_name = field_type->name(); + auto key_keep = key_; + key_ = rb_utf8_str_new(field_name.data(), field_name.length()); + const auto field_array = array.field(i).get(); + check_status(field_array->Accept(this), + "[raw-records][struct-array]"); + key_ = key_keep; + } + auto result_return = result_; + result_ = result_keep; + index_ = index_keep; + return result_return; + } + +#define VISIT(TYPE) \ + arrow::Status Visit(const arrow::TYPE ## Array& array) override { \ + fill_field(array); \ + return arrow::Status::OK(); \ + } + + VISIT(Null) + VISIT(Boolean) + VISIT(Int8) + VISIT(Int16) + VISIT(Int32) + VISIT(Int64) + VISIT(UInt8) + VISIT(UInt16) + VISIT(UInt32) + VISIT(UInt64) + // TODO + // VISIT(HalfFloat) + VISIT(Float) + VISIT(Double) + VISIT(Binary) + VISIT(String) + VISIT(FixedSizeBinary) + VISIT(Date32) + VISIT(Date64) + VISIT(Time32) + VISIT(Time64) + VISIT(Timestamp) + // TODO + // VISIT(Interval) + VISIT(List) + VISIT(Struct) + VISIT(Map) + VISIT(SparseUnion) + VISIT(DenseUnion) + VISIT(Dictionary) + VISIT(Decimal128) + VISIT(Decimal256) + // TODO + // VISIT(Extension) + +#undef VISIT + + private: + template <typename ArrayType> + inline VALUE convert_value(const ArrayType& array, + const int64_t i) { + return array_value_converter_->convert(array, i); + } + + template <typename ArrayType> + void fill_field(const ArrayType& array) { + if (array.IsNull(index_)) { + rb_hash_aset(result_, key_, Qnil); + } else { + rb_hash_aset(result_, key_, convert_value(array, index_)); + } + } + + ArrayValueConverter* array_value_converter_; + VALUE key_; + int64_t index_; + VALUE result_; + }; + + class MapArrayValueConverter : public arrow::ArrayVisitor { + public: + explicit MapArrayValueConverter(ArrayValueConverter* converter) + : array_value_converter_(converter), + offset_(0), + length_(0), + values_(Qnil) {} + + VALUE convert(const arrow::MapArray& array, + const int64_t index) { + auto key_array = array.keys().get(); + auto item_array = array.items().get(); + auto offset_keep = offset_; + auto length_keep = length_; + auto values_keep = values_; + offset_ = array.value_offset(index); + length_ = array.value_length(index); + auto keys = rb_ary_new_capa(length_); + values_ = keys; + check_status(key_array->Accept(this), + "[raw-records][map-array][keys]"); + auto items = rb_ary_new_capa(length_); + values_ = items; + check_status(item_array->Accept(this), + "[raw-records][map-array][items]"); + auto map = rb_hash_new(); + auto n = RARRAY_LEN(keys); + auto raw_keys = RARRAY_CONST_PTR(keys); + auto raw_items = RARRAY_CONST_PTR(items); + for (long i = 0; i < n; ++i) { + rb_hash_aset(map, raw_keys[i], raw_items[i]); + } + offset_ = offset_keep; + length_ = length_keep; + values_ = values_keep; + return map; + } + +#define VISIT(TYPE) \ + arrow::Status Visit(const arrow::TYPE ## Array& array) override { \ + return visit_value(array); \ + } + + VISIT(Null) + VISIT(Boolean) + VISIT(Int8) + VISIT(Int16) + VISIT(Int32) + VISIT(Int64) + VISIT(UInt8) + VISIT(UInt16) + VISIT(UInt32) + VISIT(UInt64) + // TODO + // VISIT(HalfFloat) + VISIT(Float) + VISIT(Double) + VISIT(Binary) + VISIT(String) + VISIT(FixedSizeBinary) + VISIT(Date32) + VISIT(Date64) + VISIT(Time32) + VISIT(Time64) + VISIT(Timestamp) + // TODO + // VISIT(Interval) + VISIT(List) + VISIT(Struct) + VISIT(Map) + VISIT(SparseUnion) + VISIT(DenseUnion) + VISIT(Dictionary) + VISIT(Decimal128) + VISIT(Decimal256) + // TODO + // VISIT(Extension) + +#undef VISIT + + private: + template <typename ArrayType> + inline VALUE convert_value(const ArrayType& array, + const int64_t i) { + return array_value_converter_->convert(array, i); + } + + template <typename ArrayType> + arrow::Status visit_value(const ArrayType& array) { + if (array.null_count() > 0) { + for (int64_t i = 0; i < length_; ++i) { + auto value = Qnil; + if (!array.IsNull(i + offset_)) { + value = convert_value(array, i + offset_); + } + rb_ary_push(values_, value); + } + } else { + for (int64_t i = 0; i < length_; ++i) { + rb_ary_push(values_, convert_value(array, i + offset_)); + } + } + return arrow::Status::OK(); + } + + ArrayValueConverter* array_value_converter_; + int32_t offset_; + int32_t length_; + VALUE values_; + }; + + class UnionArrayValueConverter : public arrow::ArrayVisitor { + public: + explicit UnionArrayValueConverter(ArrayValueConverter* converter) + : array_value_converter_(converter), + index_(0), + result_(Qnil) {} + + VALUE convert(const arrow::UnionArray& array, + const int64_t index) { + const auto index_keep = index_; + const auto result_keep = result_; + index_ = index; + switch (array.mode()) { + case arrow::UnionMode::SPARSE: + convert_sparse(static_cast<const arrow::SparseUnionArray&>(array)); + break; + case arrow::UnionMode::DENSE: + convert_dense(static_cast<const arrow::DenseUnionArray&>(array)); + break; + default: + rb_raise(rb_eArgError, "Invalid union mode"); + break; + } + auto result_return = result_; + index_ = index_keep; + result_ = result_keep; + return result_return; + } + +#define VISIT(TYPE) \ + arrow::Status Visit(const arrow::TYPE ## Array& array) override { \ + convert_value(array); \ + return arrow::Status::OK(); \ + } + + VISIT(Null) + VISIT(Boolean) + VISIT(Int8) + VISIT(Int16) + VISIT(Int32) + VISIT(Int64) + VISIT(UInt8) + VISIT(UInt16) + VISIT(UInt32) + VISIT(UInt64) + // TODO + // VISIT(HalfFloat) + VISIT(Float) + VISIT(Double) + VISIT(Binary) + VISIT(String) + VISIT(FixedSizeBinary) + VISIT(Date32) + VISIT(Date64) + VISIT(Time32) + VISIT(Time64) + VISIT(Timestamp) + // TODO + // VISIT(Interval) + VISIT(List) + VISIT(Struct) + VISIT(Map) + VISIT(SparseUnion) + VISIT(DenseUnion) + VISIT(Dictionary) + VISIT(Decimal128) + VISIT(Decimal256) + // TODO + // VISIT(Extension) + +#undef VISIT + + private: + template <typename ArrayType> + inline void convert_value(const ArrayType& array) { + auto result = rb_hash_new(); + if (array.IsNull(index_)) { + rb_hash_aset(result, field_name_, Qnil); + } else { + rb_hash_aset(result, + field_name_, + array_value_converter_->convert(array, index_)); + } + result_ = result; + } + + uint8_t compute_field_index(const arrow::UnionArray& array, + arrow::UnionType* type, + const char* tag) { + const auto type_code = array.raw_type_codes()[index_]; + if (type_code >= 0 && type_code <= arrow::UnionType::kMaxTypeCode) { + const auto field_id = type->child_ids()[type_code]; + if (field_id >= 0) { + return field_id; + } + } + check_status(arrow::Status::Invalid("Unknown type ID: ", type_code), + tag); + return 0; + } + + void convert_sparse(const arrow::SparseUnionArray& array) { + const auto type = + std::static_pointer_cast<arrow::UnionType>(array.type()).get(); + const auto tag = "[raw-records][union-sparse-array]"; + const auto index = compute_field_index(array, type, tag); + const auto field = type->field(index).get(); + const auto& field_name = field->name(); + const auto field_name_keep = field_name_; + field_name_ = rb_utf8_str_new(field_name.data(), field_name.length()); + const auto field_array = array.field(index).get(); + check_status(field_array->Accept(this), tag); + field_name_ = field_name_keep; + } + + void convert_dense(const arrow::DenseUnionArray& array) { + const auto type = + std::static_pointer_cast<arrow::UnionType>(array.type()).get(); + const auto tag = "[raw-records][union-dense-array]"; + const auto index = compute_field_index(array, type, tag); + const auto field = type->field(index).get(); + const auto& field_name = field->name(); + const auto field_name_keep = field_name_; + field_name_ = rb_utf8_str_new(field_name.data(), field_name.length()); + const auto field_array = array.field(index); + const auto index_keep = index_; + index_ = array.value_offset(index_); + check_status(field_array->Accept(this), tag); + index_ = index_keep; + field_name_ = field_name_keep; + } + + ArrayValueConverter* array_value_converter_; + int64_t index_; + VALUE field_name_; + VALUE result_; + }; + + class DictionaryArrayValueConverter : public arrow::ArrayVisitor { + public: + explicit DictionaryArrayValueConverter(ArrayValueConverter* converter) + : array_value_converter_(converter), + value_index_(0), + result_(Qnil) { + } + + VALUE convert(const arrow::DictionaryArray& array, + const int64_t index) { + value_index_ = array.GetValueIndex(index); + auto dictionary = array.dictionary().get(); + check_status(dictionary->Accept(this), + "[raw-records][dictionary-array]"); + return result_; + } + +#define VISIT(TYPE) \ + arrow::Status Visit(const arrow::TYPE ## Array& array) override { \ + result_ = convert_value(array, value_index_); \ + return arrow::Status::OK(); \ + } + + VISIT(Null) + VISIT(Boolean) + VISIT(Int8) + VISIT(Int16) + VISIT(Int32) + VISIT(Int64) + VISIT(UInt8) + VISIT(UInt16) + VISIT(UInt32) + VISIT(UInt64) + // TODO + // VISIT(HalfFloat) + VISIT(Float) + VISIT(Double) + VISIT(Binary) + VISIT(String) + VISIT(FixedSizeBinary) + VISIT(Date32) + VISIT(Date64) + VISIT(Time32) + VISIT(Time64) + VISIT(Timestamp) + // TODO + // VISIT(Interval) + VISIT(List) + VISIT(Struct) + VISIT(Map) + VISIT(SparseUnion) + VISIT(DenseUnion) + VISIT(Dictionary) + VISIT(Decimal128) + VISIT(Decimal256) + // TODO + // VISIT(Extension) + +#undef VISIT + + private: + template <typename ArrayType> + inline VALUE convert_value(const ArrayType& array, + const int64_t i) { + return array_value_converter_->convert(array, i); + } + + ArrayValueConverter* array_value_converter_; + int64_t value_index_; + VALUE result_; + }; + + class Converter { + public: + explicit Converter() + : array_value_converter_(), + list_array_value_converter_(&array_value_converter_), + struct_array_value_converter_(&array_value_converter_), + map_array_value_converter_(&array_value_converter_), + union_array_value_converter_(&array_value_converter_), + dictionary_array_value_converter_(&array_value_converter_) { + array_value_converter_. + set_sub_value_converters(&list_array_value_converter_, + &struct_array_value_converter_, + &map_array_value_converter_, + &union_array_value_converter_, + &dictionary_array_value_converter_); + } + + template <typename ArrayType> + inline VALUE convert_value(const ArrayType& array, + const int64_t i) { + return array_value_converter_.convert(array, i); + } + + ArrayValueConverter array_value_converter_; + ListArrayValueConverter list_array_value_converter_; + StructArrayValueConverter struct_array_value_converter_; + MapArrayValueConverter map_array_value_converter_; + UnionArrayValueConverter union_array_value_converter_; + DictionaryArrayValueConverter dictionary_array_value_converter_; + }; +} diff --git a/src/arrow/ruby/red-arrow/ext/arrow/extconf.rb b/src/arrow/ruby/red-arrow/ext/arrow/extconf.rb new file mode 100644 index 000000000..9e92bd316 --- /dev/null +++ b/src/arrow/ruby/red-arrow/ext/arrow/extconf.rb @@ -0,0 +1,76 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "extpp" +require "mkmf-gnome" +require_relative "../../lib/arrow/version" + +arrow_pkg_config_path = ENV["ARROW_PKG_CONFIG_PATH"] +if arrow_pkg_config_path + pkg_config_paths = [arrow_pkg_config_path, ENV["PKG_CONFIG_PATH"]].compact + ENV["PKG_CONFIG_PATH"] = pkg_config_paths.join(File::PATH_SEPARATOR) +end + +checking_for(checking_message("Homebrew")) do + platform = NativePackageInstaller::Platform.detect + if platform.is_a?(NativePackageInstaller::Platform::Homebrew) + openssl_prefix = `brew --prefix openssl@1.1`.chomp + unless openssl_prefix.empty? + PKGConfig.add_path("#{openssl_prefix}/lib/pkgconfig") + end + true + else + false + end +end + +unless required_pkg_config_package([ + "arrow", + Arrow::Version::MAJOR, + Arrow::Version::MINOR, + Arrow::Version::MICRO, + ], + debian: "libarrow-dev", + redhat: "arrow-devel", + homebrew: "apache-arrow", + msys2: "arrow") + exit(false) +end + +unless required_pkg_config_package([ + "arrow-glib", + Arrow::Version::MAJOR, + Arrow::Version::MINOR, + Arrow::Version::MICRO, + ], + debian: "libarrow-glib-dev", + redhat: "arrow-glib-devel", + homebrew: "apache-arrow-glib", + msys2: "arrow") + exit(false) +end + +[ + ["glib2", "ext/glib2"], +].each do |name, relative_source_dir| + spec = find_gem_spec(name) + source_dir = File.join(spec.full_gem_path, relative_source_dir) + build_dir = source_dir + add_depend_package_path(name, source_dir, build_dir) +end + +create_makefile("arrow") diff --git a/src/arrow/ruby/red-arrow/ext/arrow/memory-view.cpp b/src/arrow/ruby/red-arrow/ext/arrow/memory-view.cpp new file mode 100644 index 000000000..a3135310c --- /dev/null +++ b/src/arrow/ruby/red-arrow/ext/arrow/memory-view.cpp @@ -0,0 +1,311 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "memory-view.hpp" + +#include <arrow-glib/arrow-glib.hpp> +#include <rbgobject.h> + +#include <ruby/version.h> + +#if RUBY_API_VERSION_MAJOR >= 3 +# define HAVE_MEMORY_VIEW +# define private memory_view_private +# include <ruby/memory_view.h> +# undef private +#endif + +#include <sstream> + +namespace red_arrow { + namespace memory_view { +#ifdef HAVE_MEMORY_VIEW + // This is workaround for the following rb_memory_view_t problems + // in C++: + // + // * Can't use "private" as member name + // * Can't assign a value to "rb_memory_view_t::private" + // + // This has compatible layout with rb_memory_view_t. + struct memory_view { + VALUE obj; + void *data; + ssize_t byte_size; + bool readonly; + const char *format; + ssize_t item_size; + struct { + const rb_memory_view_item_component_t *components; + size_t length; + } item_desc; + ssize_t ndim; + const ssize_t *shape; + const ssize_t *strides; + const ssize_t *sub_offsets; + void *private_data; + }; + + struct PrivateData { + std::string format; + }; + + class PrimitiveArrayGetter : public arrow::ArrayVisitor { + public: + explicit PrimitiveArrayGetter(memory_view *view) + : view_(view) { + } + + arrow::Status Visit(const arrow::BooleanArray& array) override { + fill(static_cast<const arrow::Array&>(array)); + // Memory view doesn't support bit stream. We use one byte + // for 8 elements. Users can't calculate the number of + // elements from memory view but it's limitation of memory view. +#ifdef ARROW_LITTLE_ENDIAN + view_->format = "b8"; +#else + view_->format = "B8"; +#endif + view_->item_size = 1; + view_->byte_size = (array.length() + 7) / 8; + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::Int8Array& array) override { + fill(static_cast<const arrow::Array&>(array)); + view_->format = "c"; + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::Int16Array& array) override { + fill(static_cast<const arrow::Array&>(array)); + view_->format = "s"; + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::Int32Array& array) override { + fill(static_cast<const arrow::Array&>(array)); + view_->format = "l"; + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::Int64Array& array) override { + fill(static_cast<const arrow::Array&>(array)); + view_->format = "q"; + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::UInt8Array& array) override { + fill(static_cast<const arrow::Array&>(array)); + view_->format = "C"; + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::UInt16Array& array) override { + fill(static_cast<const arrow::Array&>(array)); + view_->format = "S"; + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::UInt32Array& array) override { + fill(static_cast<const arrow::Array&>(array)); + view_->format = "L"; + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::UInt64Array& array) override { + fill(static_cast<const arrow::Array&>(array)); + view_->format = "Q"; + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::FloatArray& array) override { + fill(static_cast<const arrow::Array&>(array)); + view_->format = "f"; + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::DoubleArray& array) override { + fill(static_cast<const arrow::Array&>(array)); + view_->format = "d"; + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::FixedSizeBinaryArray& array) override { + fill(static_cast<const arrow::Array&>(array)); + auto priv = static_cast<PrivateData *>(view_->private_data); + const auto type = + std::static_pointer_cast<const arrow::FixedSizeBinaryType>( + array.type()); + std::ostringstream output; + output << "C" << type->byte_width(); + priv->format = output.str(); + view_->format = priv->format.c_str(); + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::Date32Array& array) override { + fill(static_cast<const arrow::Array&>(array)); + view_->format = "l"; + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::Date64Array& array) override { + fill(static_cast<const arrow::Array&>(array)); + view_->format = "q"; + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::Time32Array& array) override { + fill(static_cast<const arrow::Array&>(array)); + view_->format = "l"; + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::Time64Array& array) override { + fill(static_cast<const arrow::Array&>(array)); + view_->format = "q"; + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::TimestampArray& array) override { + fill(static_cast<const arrow::Array&>(array)); + view_->format = "q"; + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::Decimal128Array& array) override { + fill(static_cast<const arrow::Array&>(array)); + view_->format = "q2"; + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::Decimal256Array& array) override { + fill(static_cast<const arrow::Array&>(array)); + view_->format = "q4"; + return arrow::Status::OK(); + } + + private: + void fill(const arrow::Array& array) { + const auto array_data = array.data(); + const auto data = array_data->GetValuesSafe<uint8_t>(1); + view_->data = const_cast<void *>(reinterpret_cast<const void *>(data)); + const auto type = + std::static_pointer_cast<const arrow::FixedWidthType>(array.type()); + view_->item_size = type->bit_width() / 8; + view_->byte_size = view_->item_size * array.length(); + } + + memory_view *view_; + }; + + bool primitive_array_get(VALUE obj, rb_memory_view_t *view, int flags) { + if (flags != RUBY_MEMORY_VIEW_SIMPLE) { + return false; + } + auto view_ = reinterpret_cast<memory_view *>(view); + view_->obj = obj; + view_->private_data = new PrivateData(); + auto array = GARROW_ARRAY(RVAL2GOBJ(obj)); + auto arrow_array = garrow_array_get_raw(array); + PrimitiveArrayGetter getter(view_); + auto status = arrow_array->Accept(&getter); + if (!status.ok()) { + return false; + } + view_->readonly = true; + view_->ndim = 1; + view_->shape = NULL; + view_->strides = NULL; + view_->sub_offsets = NULL; + return true; + } + + bool primitive_array_release(VALUE obj, rb_memory_view_t *view) { + auto view_ = reinterpret_cast<memory_view *>(view); + delete static_cast<PrivateData *>(view_->private_data); + return true; + } + + bool primitive_array_available_p(VALUE obj) { + return true; + } + + rb_memory_view_entry_t primitive_array_entry = { + primitive_array_get, + primitive_array_release, + primitive_array_available_p, + }; + + bool buffer_get(VALUE obj, rb_memory_view_t *view, int flags) { + if (flags != RUBY_MEMORY_VIEW_SIMPLE) { + return false; + } + auto view_ = reinterpret_cast<memory_view *>(view); + view_->obj = obj; + auto buffer = GARROW_BUFFER(RVAL2GOBJ(obj)); + auto arrow_buffer = garrow_buffer_get_raw(buffer); + view_->data = + const_cast<void *>(reinterpret_cast<const void *>(arrow_buffer->data())); + // Memory view doesn't support bit stream. We use one byte + // for 8 elements. Users can't calculate the number of + // elements from memory view but it's limitation of memory view. +#ifdef ARROW_LITTLE_ENDIAN + view_->format = "b8"; +#else + view_->format = "B8"; +#endif + view_->item_size = 1; + view_->byte_size = arrow_buffer->size(); + view_->readonly = true; + view_->ndim = 1; + view_->shape = NULL; + view_->strides = NULL; + view_->sub_offsets = NULL; + return true; + } + + bool buffer_release(VALUE obj, rb_memory_view_t *view) { + return true; + } + + bool buffer_available_p(VALUE obj) { + return true; + } + + rb_memory_view_entry_t buffer_entry = { + buffer_get, + buffer_release, + buffer_available_p, + }; +#endif + + void init(VALUE mArrow) { +#ifdef HAVE_MEMORY_VIEW + auto cPrimitiveArray = + rb_const_get_at(mArrow, rb_intern("PrimitiveArray")); + rb_memory_view_register(cPrimitiveArray, + &(red_arrow::memory_view::primitive_array_entry)); + + auto cBuffer = rb_const_get_at(mArrow, rb_intern("Buffer")); + rb_memory_view_register(cBuffer, &(red_arrow::memory_view::buffer_entry)); +#endif + } + } +} diff --git a/src/arrow/ruby/red-arrow/ext/arrow/memory-view.hpp b/src/arrow/ruby/red-arrow/ext/arrow/memory-view.hpp new file mode 100644 index 000000000..7a7764622 --- /dev/null +++ b/src/arrow/ruby/red-arrow/ext/arrow/memory-view.hpp @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <ruby.hpp> + +namespace red_arrow { + namespace memory_view { + void init(VALUE mArrow); + } +} diff --git a/src/arrow/ruby/red-arrow/ext/arrow/raw-records.cpp b/src/arrow/ruby/red-arrow/ext/arrow/raw-records.cpp new file mode 100644 index 000000000..16261b895 --- /dev/null +++ b/src/arrow/ruby/red-arrow/ext/arrow/raw-records.cpp @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "converters.hpp" + +namespace red_arrow { + namespace { + class RawRecordsBuilder : private Converter, public arrow::ArrayVisitor { + public: + explicit RawRecordsBuilder(VALUE records, int n_columns) + : Converter(), + records_(records), + n_columns_(n_columns) { + } + + void build(const arrow::RecordBatch& record_batch) { + rb::protect([&] { + const auto n_rows = record_batch.num_rows(); + for (int64_t i = 0; i < n_rows; ++i) { + auto record = rb_ary_new_capa(n_columns_); + rb_ary_push(records_, record); + } + row_offset_ = 0; + for (int i = 0; i < n_columns_; ++i) { + const auto array = record_batch.column(i).get(); + column_index_ = i; + check_status(array->Accept(this), + "[record-batch][raw-records]"); + } + return Qnil; + }); + } + + void build(const arrow::Table& table) { + rb::protect([&] { + const auto n_rows = table.num_rows(); + for (int64_t i = 0; i < n_rows; ++i) { + auto record = rb_ary_new_capa(n_columns_); + rb_ary_push(records_, record); + } + for (int i = 0; i < n_columns_; ++i) { + const auto& chunked_array = table.column(i).get(); + column_index_ = i; + row_offset_ = 0; + for (const auto array : chunked_array->chunks()) { + check_status(array->Accept(this), + "[table][raw-records]"); + row_offset_ += array->length(); + } + } + return Qnil; + }); + } + +#define VISIT(TYPE) \ + arrow::Status Visit(const arrow::TYPE ## Array& array) override { \ + convert(array); \ + return arrow::Status::OK(); \ + } + + VISIT(Null) + VISIT(Boolean) + VISIT(Int8) + VISIT(Int16) + VISIT(Int32) + VISIT(Int64) + VISIT(UInt8) + VISIT(UInt16) + VISIT(UInt32) + VISIT(UInt64) + // TODO + // VISIT(HalfFloat) + VISIT(Float) + VISIT(Double) + VISIT(Binary) + VISIT(String) + VISIT(FixedSizeBinary) + VISIT(Date32) + VISIT(Date64) + VISIT(Time32) + VISIT(Time64) + VISIT(Timestamp) + // TODO + // VISIT(Interval) + VISIT(List) + VISIT(Struct) + VISIT(Map) + VISIT(SparseUnion) + VISIT(DenseUnion) + VISIT(Dictionary) + VISIT(Decimal128) + VISIT(Decimal256) + // TODO + // VISIT(Extension) + +#undef VISIT + + private: + template <typename ArrayType> + void convert(const ArrayType& array) { + const auto n = array.length(); + if (array.null_count() > 0) { + for (int64_t i = 0, ii = row_offset_; i < n; ++i, ++ii) { + auto value = Qnil; + if (!array.IsNull(i)) { + value = convert_value(array, i); + } + auto record = rb_ary_entry(records_, ii); + rb_ary_store(record, column_index_, value); + } + } else { + for (int64_t i = 0, ii = row_offset_; i < n; ++i, ++ii) { + auto record = rb_ary_entry(records_, ii); + rb_ary_store(record, column_index_, convert_value(array, i)); + } + } + } + + // Destination for converted records. + VALUE records_; + + // The current column index. + int column_index_; + + // The current row offset. + int64_t row_offset_; + + // The number of columns. + const int n_columns_; + }; + } + + VALUE + record_batch_raw_records(VALUE rb_record_batch) { + auto garrow_record_batch = GARROW_RECORD_BATCH(RVAL2GOBJ(rb_record_batch)); + auto record_batch = garrow_record_batch_get_raw(garrow_record_batch).get(); + const auto n_rows = record_batch->num_rows(); + const auto n_columns = record_batch->num_columns(); + auto records = rb_ary_new_capa(n_rows); + + try { + RawRecordsBuilder builder(records, n_columns); + builder.build(*record_batch); + } catch (rb::State& state) { + state.jump(); + } + + return records; + } + + VALUE + table_raw_records(VALUE rb_table) { + auto garrow_table = GARROW_TABLE(RVAL2GOBJ(rb_table)); + auto table = garrow_table_get_raw(garrow_table).get(); + const auto n_rows = table->num_rows(); + const auto n_columns = table->num_columns(); + auto records = rb_ary_new_capa(n_rows); + + try { + RawRecordsBuilder builder(records, n_columns); + builder.build(*table); + } catch (rb::State& state) { + state.jump(); + } + + return records; + } +} diff --git a/src/arrow/ruby/red-arrow/ext/arrow/red-arrow.hpp b/src/arrow/ruby/red-arrow/ext/arrow/red-arrow.hpp new file mode 100644 index 000000000..c3301dc7b --- /dev/null +++ b/src/arrow/ruby/red-arrow/ext/arrow/red-arrow.hpp @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <arrow/api.h> + +#ifdef _WIN32 +# define gmtime_r gmtime_r_ruby_win32 +# define localtime_r localtime_r_ruby_win32 +# include <ruby.h> +# undef gmtime_r +# undef localtime_r +#endif + +#include <arrow-glib/arrow-glib.hpp> +#include <rbgobject.h> + +namespace red_arrow { + extern VALUE cDate; + + extern VALUE cArrowTime; + + extern VALUE ArrowTimeUnitSECOND; + extern VALUE ArrowTimeUnitMILLI; + extern VALUE ArrowTimeUnitMICRO; + extern VALUE ArrowTimeUnitNANO; + + extern ID id_BigDecimal; + extern ID id_jd; + extern ID id_new; + extern ID id_to_datetime; + + VALUE array_values(VALUE obj); + VALUE chunked_array_values(VALUE obj); + + VALUE record_batch_raw_records(VALUE obj); + VALUE table_raw_records(VALUE obj); + + inline VALUE time_unit_to_scale(const arrow::TimeUnit::type unit) { + switch (unit) { + case arrow::TimeUnit::SECOND: + return INT2FIX(1); + case arrow::TimeUnit::MILLI: + return INT2FIX(1000); + case arrow::TimeUnit::MICRO: + return INT2FIX(1000 * 1000); + case arrow::TimeUnit::NANO: + // NOTE: INT2FIX works for 1e+9 because: FIXNUM_MAX >= (1<<30) - 1 > 1e+9 + return INT2FIX(1000 * 1000 * 1000); + default: + rb_raise(rb_eArgError, "invalid arrow::TimeUnit: %d", unit); + return Qnil; + } + } + + inline VALUE time_unit_to_enum(const arrow::TimeUnit::type unit) { + switch (unit) { + case arrow::TimeUnit::SECOND: + return red_arrow::ArrowTimeUnitSECOND; + case arrow::TimeUnit::MILLI: + return red_arrow::ArrowTimeUnitMILLI; + case arrow::TimeUnit::MICRO: + return red_arrow::ArrowTimeUnitMICRO; + case arrow::TimeUnit::NANO: + return red_arrow::ArrowTimeUnitNANO; + default: + rb_raise(rb_eArgError, "invalid arrow::TimeUnit: %d", unit); + return Qnil; + } + } + + inline void check_status(const arrow::Status&& status, const char* context) { + GError* error = nullptr; + if (!garrow_error_check(&error, status, context)) { + RG_RAISE_ERROR(error); + } + } +} diff --git a/src/arrow/ruby/red-arrow/ext/arrow/values.cpp b/src/arrow/ruby/red-arrow/ext/arrow/values.cpp new file mode 100644 index 000000000..a8a5775b9 --- /dev/null +++ b/src/arrow/ruby/red-arrow/ext/arrow/values.cpp @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "converters.hpp" + +namespace red_arrow { + namespace { + class ValuesBuilder : private Converter, public arrow::ArrayVisitor { + public: + explicit ValuesBuilder(VALUE values) + : Converter(), + values_(values), + row_offset_(0) { + } + + void build(const arrow::Array& array, VALUE rb_array) { + rb::protect([&] { + check_status(array.Accept(this), + "[array][values]"); + return Qnil; + }); + } + + void build(const arrow::ChunkedArray& chunked_array, + VALUE rb_chunked_array) { + rb::protect([&] { + for (const auto& array : chunked_array.chunks()) { + check_status(array->Accept(this), + "[chunked-array][values]"); + row_offset_ += array->length(); + } + return Qnil; + }); + } + +#define VISIT(TYPE) \ + arrow::Status Visit(const arrow::TYPE ## Array& array) override { \ + convert(array); \ + return arrow::Status::OK(); \ + } + + VISIT(Null) + VISIT(Boolean) + VISIT(Int8) + VISIT(Int16) + VISIT(Int32) + VISIT(Int64) + VISIT(UInt8) + VISIT(UInt16) + VISIT(UInt32) + VISIT(UInt64) + // TODO + // VISIT(HalfFloat) + VISIT(Float) + VISIT(Double) + VISIT(Binary) + VISIT(String) + VISIT(FixedSizeBinary) + VISIT(Date32) + VISIT(Date64) + VISIT(Time32) + VISIT(Time64) + VISIT(Timestamp) + // TODO + // VISIT(Interval) + VISIT(List) + VISIT(Struct) + VISIT(Map) + VISIT(SparseUnion) + VISIT(DenseUnion) + VISIT(Dictionary) + VISIT(Decimal128) + VISIT(Decimal256) + // TODO + // VISIT(Extension) + +#undef VISIT + + private: + template <typename ArrayType> + void convert(const ArrayType& array) { + const auto n = array.length(); + if (array.null_count() > 0) { + for (int64_t i = 0, ii = row_offset_; i < n; ++i, ++ii) { + auto value = Qnil; + if (!array.IsNull(i)) { + value = convert_value(array, i); + } + rb_ary_store(values_, ii, value); + } + } else { + for (int64_t i = 0, ii = row_offset_; i < n; ++i, ++ii) { + rb_ary_store(values_, ii, convert_value(array, i)); + } + } + } + + // Destination for converted values. + VALUE values_; + + // The current row offset. + int64_t row_offset_; + }; + } + + VALUE + array_values(VALUE rb_array) { + auto garrow_array = GARROW_ARRAY(RVAL2GOBJ(rb_array)); + auto array = garrow_array_get_raw(garrow_array).get(); + const auto n_rows = array->length(); + auto values = rb_ary_new_capa(n_rows); + + try { + ValuesBuilder builder(values); + builder.build(*array, rb_array); + } catch (rb::State& state) { + state.jump(); + } + + return values; + } + + VALUE + chunked_array_values(VALUE rb_chunked_array) { + auto garrow_chunked_array = + GARROW_CHUNKED_ARRAY(RVAL2GOBJ(rb_chunked_array)); + auto chunked_array = + garrow_chunked_array_get_raw(garrow_chunked_array).get(); + const auto n_rows = chunked_array->length(); + auto values = rb_ary_new_capa(n_rows); + + try { + ValuesBuilder builder(values); + builder.build(*chunked_array, rb_chunked_array); + } catch (rb::State& state) { + state.jump(); + } + + return values; + } +} diff --git a/src/arrow/ruby/red-arrow/image/red-arrow.png b/src/arrow/ruby/red-arrow/image/red-arrow.png Binary files differnew file mode 100644 index 000000000..6db9b4b7a --- /dev/null +++ b/src/arrow/ruby/red-arrow/image/red-arrow.png diff --git a/src/arrow/ruby/red-arrow/lib/arrow.rb b/src/arrow/ruby/red-arrow/lib/arrow.rb new file mode 100644 index 000000000..8fbc537bc --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow.rb @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "extpp/setup" +require "gio2" + +require "arrow/version" + +require "arrow/loader" + +module Arrow + class Error < StandardError + end + + Loader.load +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/aggregate-node-options.rb b/src/arrow/ruby/red-arrow/lib/arrow/aggregate-node-options.rb new file mode 100644 index 000000000..f3a6ace58 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/aggregate-node-options.rb @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class AggregateNodeOptions + class << self + # @api private + def try_convert(value) + case value + when Hash + aggregations = value[:aggregations] + return nil if aggregations.nil? + keys = value[:keys] + new(aggregations, keys) + else + nil + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/aggregation.rb b/src/arrow/ruby/red-arrow/lib/arrow/aggregation.rb new file mode 100644 index 000000000..9aac8239d --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/aggregation.rb @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Aggregation + class << self + # @api private + def try_convert(value) + case value + when Hash + function = value[:function] + return nil if function.nil? + function = function.to_s if function.is_a?(Symbol) + return nil unless function.is_a?(String) + # TODO: Improve this when we have non hash based aggregate function + function = "hash_#{function}" unless function.start_with?("hash_") + options = value[:options] + input = value[:input] + return nil if input.nil? + output = value[:output] + if output.nil? + normalized_function = function.gsub(/\Ahash_/, "") + output = "#{normalized_function}(#{input})" + end + new(function, options, input, output) + else + nil + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/array-builder.rb new file mode 100644 index 000000000..651aed962 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/array-builder.rb @@ -0,0 +1,214 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "date" + +module Arrow + class ArrayBuilder + class << self + def build(values) + if self != ArrayBuilder + builder = new + return builder.build(values) + end + + builder_info = nil + values.each do |value| + builder_info = detect_builder_info(value, builder_info) + break if builder_info and builder_info[:detected] + end + if builder_info + builder = builder_info[:builder] + builder.build(values) + else + Arrow::StringArray.new(values) + end + end + + def buildable?(args) + args.size == method(:build).arity + end + + private + def detect_builder_info(value, builder_info) + case value + when nil + builder_info + when true, false + { + builder: BooleanArrayBuilder.new, + detected: true, + } + when String + { + builder: StringArrayBuilder.new, + detected: true, + } + when Symbol + { + builder: StringDictionaryArrayBuilder.new, + detected: true, + } + when Float + { + builder: DoubleArrayBuilder.new, + detected: true, + } + when Integer + if value < 0 + { + builder: IntArrayBuilder.new, + detected: true, + } + else + { + builder: UIntArrayBuilder.new, + } + end + when Time + data_type = value.data_type + case data_type.unit + when TimeUnit::SECOND + builder_info || { + builder: Time32ArrayBuilder.new(data_type) + } + when TimeUnit::MILLI + if builder_info and builder_info[:builder].is_a?(Time64ArrayBuilder) + builder_info + else + { + builder: Time32ArrayBuilder.new(data_type), + } + end + when TimeUnit::MICRO + { + builder: Time64ArrayBuilder.new(data_type), + } + when TimeUnit::NANO + { + builder: Time64ArrayBuilder.new(data_type), + detected: true + } + end + when ::Time + data_type = TimestampDataType.new(:nano) + { + builder: TimestampArrayBuilder.new(data_type), + detected: true, + } + when DateTime + { + builder: Date64ArrayBuilder.new, + detected: true, + } + when Date + { + builder: Date32ArrayBuilder.new, + detected: true, + } + when BigDecimal + if value.to_arrow.is_a?(Decimal128) + { + builder: Decimal128ArrayBuilder.new, + } + else + { + builder: Decimal256ArrayBuilder.new, + detected: true, + } + end + when ::Array + sub_builder_info = nil + value.each do |sub_value| + sub_builder_info = detect_builder_info(sub_value, sub_builder_info) + break if sub_builder_info and sub_builder_info[:detected] + end + if sub_builder_info and sub_builder_info[:detected] + sub_value_data_type = sub_builder_info[:builder].value_data_type + field = Field.new("item", sub_value_data_type) + { + builder: ListArrayBuilder.new(ListDataType.new(field)), + detected: true, + } + else + builder_info + end + else + { + builder: StringArrayBuilder.new, + detected: true, + } + end + end + end + + def build(values) + append(*values) + finish + end + + # @since 0.12.0 + def append(*values) + value_convertable = respond_to?(:convert_to_arrow_value, true) + start_index = 0 + current_index = 0 + status = :value + + values.each do |value| + if value.nil? + if status == :value + if start_index != current_index + target_values = values[start_index...current_index] + if value_convertable + target_values = target_values.collect do |v| + convert_to_arrow_value(v) + end + end + append_values(target_values, nil) + start_index = current_index + end + status = :null + end + else + if status == :null + append_nulls(current_index - start_index) + start_index = current_index + status = :value + end + end + current_index += 1 + end + if start_index != current_index + if status == :value + if start_index == 0 and current_index == values.size + target_values = values + else + target_values = values[start_index...current_index] + end + if value_convertable + target_values = target_values.collect do |v| + convert_to_arrow_value(v) + end + end + append_values(target_values, nil) + else + append_nulls(current_index - start_index) + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/array.rb b/src/arrow/ruby/red-arrow/lib/arrow/array.rb new file mode 100644 index 000000000..c6c0daaec --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/array.rb @@ -0,0 +1,234 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Array + include Enumerable + include GenericFilterable + include GenericTakeable + + class << self + def new(*args) + _builder_class = builder_class + return super if _builder_class.nil? + return super unless _builder_class.buildable?(args) + _builder_class.build(*args) + end + + def builder_class + builder_class_name = "#{name}Builder" + return nil unless const_defined?(builder_class_name) + const_get(builder_class_name) + end + end + + # @param i [Integer] + # The index of the value to be gotten. + # + # You can specify negative index like for `::Array#[]`. + # + # @return [Object, nil] + # The `i`-th value. + # + # `nil` for NULL value or out of range `i`. + def [](i) + i += length if i < 0 + return nil if i < 0 or i >= length + if null?(i) + nil + else + get_value(i) + end + end + + # @param other [Arrow::Array] The array to be compared. + # @param options [Arrow::EqualOptions, Hash] (nil) + # The options to custom how to compare. + # + # @return [Boolean] + # `true` if both of them have the same data, `false` otherwise. + # + # @since 5.0.0 + def equal_array?(other, options=nil) + equal_options(other, options) + end + + def each + return to_enum(__method__) unless block_given? + + length.times do |i| + yield(self[i]) + end + end + + def reverse_each + return to_enum(__method__) unless block_given? + + (length - 1).downto(0) do |i| + yield(self[i]) + end + end + + def to_arrow + self + end + + alias_method :value_data_type_raw, :value_data_type + def value_data_type + @value_data_type ||= value_data_type_raw + end + + def to_a + values + end + + alias_method :is_in_raw, :is_in + def is_in(values) + case values + when ::Array + if self.class.builder_class.buildable?([values]) + values = self.class.new(values) + else + values = self.class.new(value_data_type, values) + end + is_in_raw(values) + when ChunkedArray + is_in_chunked_array(values) + else + is_in_raw(values) + end + end + + # @api private + alias_method :concatenate_raw, :concatenate + # Concatenates the given other arrays to the array. + # + # @param other_arrays [::Array, Arrow::Array] The arrays to be + # concatenated. + # + # Each other array is processed by {#resolve} before they're + # concatenated. + # + # @example Raw Ruby Array + # array = Arrow::Int32Array.new([1]) + # array.concatenate([2, 3], [4]) # => Arrow::Int32Array.new([1, 2, 3, 4]) + # + # @example Arrow::Array + # array = Arrow::Int32Array.new([1]) + # array.concatenate(Arrow::Int32Array.new([2, 3]), + # Arrow::Int8Array.new([4])) # => Arrow::Int32Array.new([1, 2, 3, 4]) + # + # @since 4.0.0 + def concatenate(*other_arrays) + other_arrays = other_arrays.collect do |other_array| + resolve(other_array) + end + concatenate_raw(other_arrays) + end + + # Concatenates the given other array to the array. + # + # If you have multiple arrays to be concatenated, you should use + # {#concatenate} to concatenate multiple arrays at once. + # + # @param other_array [::Array, Arrow::Array] The array to be concatenated. + # + # `@other_array` is processed by {#resolve} before it's + # concatenated. + # + # @example Raw Ruby Array + # Arrow::Int32Array.new([1]) + [2, 3] # => Arrow::Int32Array.new([1, 2, 3]) + # + # @example Arrow::Array + # Arrow::Int32Array.new([1]) + + # Arrow::Int32Array.new([2, 3]) # => Arrow::Int32Array.new([1, 2, 3]) + # + # @since 4.0.0 + def +(other_array) + concatenate(other_array) + end + + # Ensures returning the same data type array from the given array. + # + # @return [Arrow::Array] + # + # @overload resolve(other_raw_array) + # + # @param other_raw_array [::Array] A raw Ruby Array. A new Arrow::Array + # is built by `self.class.new`. + # + # @example Raw Ruby Array + # int32_array = Arrow::Int32Array.new([1]) + # other_array = int32_array.resolve([2, 3, 4]) + # other_array # => Arrow::Int32Array.new([2, 3, 4]) + # + # @overload resolve(other_array) + # + # @param other_array [Arrow::Array] Another Arrow::Array. + # + # If the given other array is an same data type array of + # `self`, the given other array is returned as-is. + # + # If the given other array isn't an same data type array of + # `self`, the given other array is casted. + # + # @example Same data type + # int32_array = Arrow::Int32Array.new([1]) + # other_int32_array = Arrow::Int32Array.new([2, 3, 4]) + # other_array = int32_array.resolve(other_int32_array) + # other_array.object_id == other_int32_array.object_id + # + # @example Other data type + # int32_array = Arrow::Int32Array.new([1]) + # other_int8_array = Arrow::Int8Array.new([2, 3, 4]) + # other_array = int32_array.resolve(other_int32_array) + # other_array #=> Arrow::Int32Array.new([2, 3, 4]) + # + # @since 4.0.0 + def resolve(other_array) + if other_array.is_a?(::Array) + builder_class = self.class.builder_class + if builder_class.nil? + message = + "[array][resolve] can't build #{value_data_type} array " + + "from raw Ruby Array" + raise ArgumentError, message + end + if builder_class.buildable?([other_array]) + other_array = builder_class.build(other_array) + elsif builder_class.buildable?([value_data_type, other_array]) + other_array = builder_class.build(value_data_type, other_array) + else + message = + "[array][resolve] need to implement " + + "a feature that building #{value_data_type} array " + + "from raw Ruby Array" + raise NotImpelemented, message + end + other_array + elsif other_array.respond_to?(:value_data_type) + return other_array if value_data_type == other_array.value_data_type + other_array.cast(value_data_type) + else + message = + "[array][resolve] can't build #{value_data_type} array: " + + "#{other_array.inspect}" + raise ArgumentError, message + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/bigdecimal-extension.rb b/src/arrow/ruby/red-arrow/lib/arrow/bigdecimal-extension.rb new file mode 100644 index 000000000..338efe696 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/bigdecimal-extension.rb @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "bigdecimal" + +class BigDecimal + def to_arrow + if precision <= Arrow::Decimal128DataType::MAX_PRECISION + Arrow::Decimal128.new(to_s) + else + Arrow::Decimal256.new(to_s) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/binary-dictionary-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/binary-dictionary-array-builder.rb new file mode 100644 index 000000000..6d05e2c41 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/binary-dictionary-array-builder.rb @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class BinaryDictionaryArrayBuilder + include SymbolValuesAppendable + + private + def create_values_array_builder + BinaryArrayBuilder.new + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/block-closable.rb b/src/arrow/ruby/red-arrow/lib/arrow/block-closable.rb new file mode 100644 index 000000000..ec236bd15 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/block-closable.rb @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + module BlockClosable + def open(*args, &block) + io = new(*args) + return io unless block + + begin + yield(io) + ensure + if io.respond_to?(:closed?) + io.close unless io.closed? + else + io.close + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/buffer.rb b/src/arrow/ruby/red-arrow/lib/arrow/buffer.rb new file mode 100644 index 000000000..9f3a3f61b --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/buffer.rb @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Buffer + class << self + # @api private + def try_convert(value) + case value + when String + new(value) + else + nil + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/chunked-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/chunked-array.rb new file mode 100644 index 000000000..30dffa856 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/chunked-array.rb @@ -0,0 +1,91 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class ChunkedArray + include Enumerable + include GenericFilterable + include GenericTakeable + + alias_method :size, :n_rows + unless method_defined?(:length) + alias_method :length, :n_rows + end + + alias_method :chunks_raw, :chunks + def chunks + @chunks ||= chunks_raw + end + + def null?(i) + chunks.each do |array| + return array.null?(i) if i < array.length + i -= array.length + end + nil + end + + def valid?(i) + chunks.each do |array| + return array.valid?(i) if i < array.length + i -= array.length + end + nil + end + + def [](i) + i += length if i < 0 + chunks.each do |array| + return array[i] if i < array.length + i -= array.length + end + nil + end + + def each(&block) + return to_enum(__method__) unless block_given? + + chunks.each do |array| + array.each(&block) + end + end + + def reverse_each(&block) + return to_enum(__method__) unless block_given? + + chunks.reverse_each do |array| + array.reverse_each(&block) + end + end + + def each_chunk(&block) + chunks.each(&block) + end + + def pack + first_chunk = chunks.first + data_type = first_chunk.value_data_type + case data_type + when TimestampDataType + builder = TimestampArrayBuilder.new(data_type) + builder.build(to_a) + else + first_chunk.class.new(to_a) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/column-containable.rb b/src/arrow/ruby/red-arrow/lib/arrow/column-containable.rb new file mode 100644 index 000000000..7d7de66bd --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/column-containable.rb @@ -0,0 +1,147 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + module ColumnContainable + def columns + @columns ||= schema.n_fields.times.collect do |i| + Column.new(self, i) + end + end + + def each_column(&block) + columns.each(&block) + end + + # @overload [](name) + # Find a column that has the given name. + # + # @param name [String, Symbol] The column name to be found. + # @return [Column] The found column. + # + # @overload [](index) + # Find the `index`-th column. + # + # @param index [Integer] The index to be found. + # @return [Column] The found column. + def find_column(name_or_index) + case name_or_index + when String, Symbol + name = name_or_index.to_s + index = schema.get_field_index(name) + return nil if index == -1 + Column.new(self, index) + when Integer + index = name_or_index + index += n_columns if index < 0 + return nil if index < 0 or index >= n_columns + Column.new(self, index) + else + message = "column name or index must be String, Symbol or Integer: " + message << name_or_index.inspect + raise ArgumentError, message + end + end + + # Selects columns that are selected by `selectors` and/or `block` + # and creates a new container only with the selected columns. + # + # @param selectors [Array<String, Symbol, Integer, Range>] + # If a selector is `String`, `Symbol` or `Integer`, the selector + # selects a column by {#find_column}. + # + # If a selector is `Range`, the selector selects columns by `::Array#[]`. + # @yield [column] Gives a column to the block to select columns. + # This uses `::Array#select`. + # @yieldparam column [Column] A target column. + # @yieldreturn [Boolean] Whether the given column is selected or not. + # @return [self.class] The newly created container that only has selected + # columns. + def select_columns(*selectors, &block) + if selectors.empty? + return to_enum(__method__) unless block_given? + selected_columns = columns.select(&block) + else + selected_columns = [] + selectors.each do |selector| + case selector + when Range + selected_columns.concat(columns[selector]) + else + column = find_column(selector) + if column.nil? + case selector + when String, Symbol + message = "unknown column: #{selector.inspect}: #{inspect}" + raise KeyError.new(message) + else + message = "out of index (0..#{n_columns - 1}): " + message << "#{selector.inspect}: #{inspect}" + raise IndexError.new(message) + end + end + selected_columns << column + end + end + selected_columns = selected_columns.select(&block) if block_given? + end + self.class.new(selected_columns) + end + + # @overload [](name) + # Find a column that has the given name. + # + # @param name [String, Symbol] The column name to be found. + # @return [Column] The found column. + # @see #find_column + # + # @overload [](index) + # Find the `index`-th column. + # + # @param index [Integer] The index to be found. + # @return [Column] The found column. + # @see #find_column + # + # @overload [](range) + # Selects columns that are in `range` and creates a new container + # only with the selected columns. + # + # @param range [Range] The range to be selected. + # @return [self.class] The newly created container that only has selected + # columns. + # @see #select_columns + # + # @overload [](selectors) + # Selects columns that are selected by `selectors` and creates a + # new container only with the selected columns. + # + # @param selectors [Array] The selectors that are used to select columns. + # @return [self.class] The newly created container that only has selected + # columns. + # @see #select_columns + def [](selector) + case selector + when ::Array + select_columns(*selector) + when Range + select_columns(selector) + else + find_column(selector) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/column.rb b/src/arrow/ruby/red-arrow/lib/arrow/column.rb new file mode 100644 index 000000000..06f3dbdc0 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/column.rb @@ -0,0 +1,76 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Column + include Enumerable + + attr_reader :container + attr_reader :field + attr_reader :data + def initialize(container, index) + @container = container + @index = index + @field = @container.schema[@index] + @data = @container.get_column_data(@index) + end + + def name + @field.name + end + + def data_type + @field.data_type + end + + def null?(i) + @data.null?(i) + end + + def valid?(i) + @data.valid?(i) + end + + def [](i) + @data[i] + end + + def each(&block) + @data.each(&block) + end + + def reverse_each(&block) + @data.reverse_each(&block) + end + + def n_rows + @data.n_rows + end + alias_method :size, :n_rows + alias_method :length, :n_rows + + def n_nulls + @data.n_nulls + end + + def ==(other) + other.is_a?(self.class) and + @field == other.field and + @data == other.data + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/compression-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/compression-type.rb new file mode 100644 index 000000000..b913e48ff --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/compression-type.rb @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class CompressionType + EXTENSIONS = {} + values.each do |value| + case value + when UNCOMPRESSED + when GZIP + EXTENSIONS["gz"] = value + else + EXTENSIONS[value.nick] = value + end + end + + class << self + def resolve_extension(extension) + EXTENSIONS[extension.to_s] + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/constructor-arguments-gc-guardable.rb b/src/arrow/ruby/red-arrow/lib/arrow/constructor-arguments-gc-guardable.rb new file mode 100644 index 000000000..16669be93 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/constructor-arguments-gc-guardable.rb @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + module ConstructorArgumentsGCGuardable + def initialize(*args) + super + @arguments = args + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/csv-loader.rb b/src/arrow/ruby/red-arrow/lib/arrow/csv-loader.rb new file mode 100644 index 000000000..f82263e46 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/csv-loader.rb @@ -0,0 +1,384 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "csv" +require "pathname" +require "time" + +module Arrow + class CSVLoader + class << self + def load(path_or_data, **options) + new(path_or_data, **options).load + end + end + + def initialize(path_or_data, **options) + @path_or_data = path_or_data + @options = options + if @options.key?(:delimiter) + @options[:col_sep] = @options.delete(:delimiter) + end + @compression = @options.delete(:compression) + end + + def load + case @path_or_data + when Pathname + load_from_path(@path_or_data.to_path) + when /\A.+\.csv\z/i + load_from_path(@path_or_data) + else + load_data(@path_or_data) + end + end + + private + def open_csv(path, **options) + CSV.open(path, **options) do |csv| + yield(csv) + end + end + + def parse_csv_data(data, **options) + csv = CSV.new(data, **options) + begin + yield(csv) + ensure + csv.close + end + end + + def read_csv(csv) + values_set = [] + csv.each do |row| + if row.is_a?(CSV::Row) + row = row.collect(&:last) + end + row.each_with_index do |value, i| + values = (values_set[i] ||= []) + values << value + end + end + return nil if values_set.empty? + + arrays = values_set.collect.with_index do |values, i| + ArrayBuilder.build(values) + end + if csv.headers + names = csv.headers + else + names = arrays.size.times.collect(&:to_s) + end + raw_table = {} + names.each_with_index do |name, i| + raw_table[name] = arrays[i] + end + Table.new(raw_table) + end + + def reader_options + options = CSVReadOptions.new + @options.each do |key, value| + case key + when :headers + case value + when ::Array + options.column_names = value + when String + return nil + else + if value + options.generate_column_names = false + else + options.generate_column_names = true + end + end + when :column_types + value.each do |name, type| + options.add_column_type(name, type) + end + when :schema + options.add_schema(value) + when :encoding + # process encoding on opening input + when :col_sep + options.delimiter = value + else + setter = "#{key}=" + if options.respond_to?(setter) + options.__send__(setter, value) + else + return nil + end + end + end + options + end + + def open_decompress_input(raw_input) + if @compression + codec = Codec.new(@compression) + CompressedInputStream.open(codec, raw_input) do |input| + yield(input) + end + else + yield(raw_input) + end + end + + def open_encoding_convert_stream(raw_input, &block) + encoding = @options[:encoding] + if encoding + converter = Gio::CharsetConverter.new("UTF-8", encoding) + convert_input_stream = + Gio::ConverterInputStream.new(raw_input, converter) + GIOInputStream.open(convert_input_stream, &block) + else + yield(raw_input) + end + end + + def wrap_input(raw_input) + open_decompress_input(raw_input) do |input_| + open_encoding_convert_stream(input_) do |input__| + yield(input__) + end + end + end + + def load_from_path(path) + options = reader_options + if options + begin + MemoryMappedInputStream.open(path) do |raw_input| + wrap_input(raw_input) do |input| + return CSVReader.new(input, options).read + end + end + rescue Arrow::Error::Invalid, Gio::Error + end + end + + options = update_csv_parse_options(@options, :open_csv, path) + open_csv(path, **options) do |csv| + read_csv(csv) + end + end + + def load_data(data) + options = reader_options + if options + begin + BufferInputStream.open(Buffer.new(data)) do |raw_input| + wrap_input(raw_input) do |input| + return CSVReader.new(input, options).read + end + end + rescue Arrow::Error::Invalid, Gio::Error + end + end + + options = update_csv_parse_options(@options, :parse_csv_data, data) + parse_csv_data(data, **options) do |csv| + read_csv(csv) + end + end + + def selective_converter(target_index) + lambda do |field, field_info| + if target_index.nil? or field_info.index == target_index + yield(field) + else + field + end + end + end + + BOOLEAN_CONVERTER = lambda do |field| + begin + encoded_field = field.encode(CSV::ConverterEncoding) + rescue EncodingError + field + else + case encoded_field + when "true" + true + when "false" + false + else + field + end + end + end + + ISO8601_CONVERTER = lambda do |field| + begin + encoded_field = field.encode(CSV::ConverterEncoding) + rescue EncodingError + field + else + begin + ::Time.iso8601(encoded_field) + rescue ArgumentError + field + end + end + end + + AVAILABLE_CSV_PARSE_OPTIONS = {} + CSV.instance_method(:initialize).parameters.each do |type, name| + AVAILABLE_CSV_PARSE_OPTIONS[name] = true if type == :key + end + + def update_csv_parse_options(options, create_csv, *args) + if options.key?(:converters) + new_options = options.dup + else + converters = [:all, BOOLEAN_CONVERTER, ISO8601_CONVERTER] + new_options = options.merge(converters: converters) + end + + # TODO: Support :schema and :column_types + + unless AVAILABLE_CSV_PARSE_OPTIONS.empty? + new_options.select! do |key, value| + AVAILABLE_CSV_PARSE_OPTIONS.key?(key) + end + end + + unless options.key?(:headers) + __send__(create_csv, *args, **new_options) do |csv| + new_options[:headers] = have_header?(csv) + end + end + unless options.key?(:converters) + __send__(create_csv, *args, **new_options) do |csv| + new_options[:converters] = detect_robust_converters(csv) + end + end + + new_options + end + + def have_header?(csv) + if @options.key?(:headers) + return @options[:headers] + end + + row1 = csv.shift + return false if row1.nil? + return false if row1.any?(&:nil?) + + row2 = csv.shift + return nil if row2.nil? + return true if row2.any?(&:nil?) + + return false if row1.any? {|value| not value.is_a?(String)} + + if row1.collect(&:class) != row2.collect(&:class) + return true + end + + nil + end + + def detect_robust_converters(csv) + column_types = [] + csv.each do |row| + if row.is_a?(CSV::Row) + each_value = Enumerator.new do |yielder| + row.each do |_name, value| + yielder << value + end + end + else + each_value = row.each + end + each_value.with_index do |value, i| + current_column_type = column_types[i] + next if current_column_type == :string + + candidate_type = nil + case value + when nil + next + when "true", "false", true, false + candidate_type = :boolean + when Integer + candidate_type = :integer + if current_column_type == :float + candidate_type = :float + end + when Float + candidate_type = :float + if current_column_type == :integer + column_types[i] = candidate_type + end + when ::Time + candidate_type = :time + when DateTime + candidate_type = :date_time + when Date + candidate_type = :date + when String + next if value.empty? + candidate_type = :string + else + candidate_type = :string + end + + column_types[i] ||= candidate_type + if column_types[i] != candidate_type + column_types[i] = :string + end + end + end + + converters = [] + column_types.each_with_index do |type, i| + case type + when :boolean + converters << selective_converter(i, &BOOLEAN_CONVERTER) + when :integer + converters << selective_converter(i) do |field| + if field.nil? or field.empty? + nil + else + CSV::Converters[:integer].call(field) + end + end + when :float + converters << selective_converter(i) do |field| + if field.nil? or field.empty? + nil + else + CSV::Converters[:float].call(field) + end + end + when :time + converters << selective_converter(i, &ISO8601_CONVERTER) + when :date_time + converters << selective_converter(i, &CSV::Converters[:date_time]) + when :date + converters << selective_converter(i, &CSV::Converters[:date]) + end + end + converters + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/csv-read-options.rb b/src/arrow/ruby/red-arrow/lib/arrow/csv-read-options.rb new file mode 100644 index 000000000..dec3dec95 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/csv-read-options.rb @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class CSVReadOptions + alias_method :add_column_type_raw, :add_column_type + def add_column_type(name, type) + add_column_type_raw(name, DataType.resolve(type)) + end + + alias_method :delimiter_raw, :delimiter + def delimiter + delimiter_raw.chr + end + + alias_method :delimiter_raw=, :delimiter= + def delimiter=(delimiter) + case delimiter + when String + if delimiter.bytesize != 1 + message = "delimiter must be 1 byte character: #{delimiter.inspect}" + raise ArgumentError, message + end + delimiter = delimiter.ord + end + self.delimiter_raw = delimiter + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/data-type.rb new file mode 100644 index 000000000..07b452521 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/data-type.rb @@ -0,0 +1,198 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class DataType + class << self + # Ensure returning suitable {Arrow::DataType}. + # + # @overload resolve(data_type) + # + # Returns the given data type itself. This is convenient to + # use this method as {Arrow::DataType} converter. + # + # @param data_type [Arrow::DataType] The data type. + # + # @return [Arrow::DataType] The given data type itself. + # + # @overload resolve(name) + # + # Creates a suitable data type from the given type name. For + # example, you can create {Arrow::BooleanDataType} from + # `:boolean`. + # + # @param name [String, Symbol] The type name of the data type. + # + # @return [Arrow::DataType] A new suitable data type. + # + # @example Create a boolean data type + # Arrow::DataType.resolve(:boolean) + # + # @overload resolve(name_with_arguments) + # + # Creates a new suitable data type from the given type name + # with arguments. + # + # @param name_with_arguments [::Array<String, ...>] + # The type name of the data type as the first element. + # + # The rest elements are additional information of the data type. + # + # For example, {Arrow::TimestampDataType} needs unit as + # additional information. + # + # @return [Arrow::DataType] A new suitable data type. + # + # @example Create a boolean data type + # Arrow::DataType.resolve([:boolean]) + # + # @example Create a milliseconds unit timestamp data type + # Arrow::DataType.resolve([:timestamp, :milli]) + # + # @overload resolve(description) + # + # Creates a new suitable data type from the given data type + # description. + # + # Data type description is a raw `Hash`. Data type description + # must have `:type` value. `:type` is the type of the data type. + # + # If the type needs additional information, you need to + # specify it. See constructor document what information is + # needed. For example, {Arrow::ListDataType#initialize} needs + # `:field` value. + # + # @param description [Hash] The description of the data type. + # + # @option description [String, Symbol] :type The type name of + # the data type. + # + # @return [Arrow::DataType] A new suitable data type. + # + # @example Create a boolean data type + # Arrow::DataType.resolve(type: :boolean) + # + # @example Create a list data type + # Arrow::DataType.resolve(type: :list, + # field: {name: "visible", type: :boolean}) + def resolve(data_type) + case data_type + when DataType + data_type + when String, Symbol + resolve_class(data_type).new + when ::Array + type, *arguments = data_type + resolve_class(type).new(*arguments) + when Hash + type = nil + description = {} + data_type.each do |key, value| + key = key.to_sym + case key + when :type + type = value + else + description[key] = value + end + end + if type.nil? + message = + "data type description must have :type value: #{data_type.inspect}" + raise ArgumentError, message + end + data_type_class = resolve_class(type) + if description.empty? + data_type_class.new + else + data_type_class.new(description) + end + else + message = + "data type must be " + + "Arrow::DataType, String, Symbol, [String, ...], [Symbol, ...] " + + "{type: String, ...} or {type: Symbol, ...}: #{data_type.inspect}" + raise ArgumentError, message + end + end + + def sub_types + types = {} + gtype.children.each do |child| + sub_type = child.to_class + types[sub_type] = true + sub_type.sub_types.each do |sub_sub_type| + types[sub_sub_type] = true + end + end + types.keys + end + + def try_convert(value) + begin + resolve(value) + rescue ArgumentError + nil + end + end + + private + def resolve_class(data_type) + components = data_type.to_s.split("_").collect(&:capitalize) + data_type_name = components.join.gsub(/\AUint/, "UInt") + data_type_class_name = "#{data_type_name}DataType" + unless Arrow.const_defined?(data_type_class_name) + available_types = [] + Arrow.constants.each do |name| + name = name.to_s + next if name == "DataType" + next unless name.end_with?("DataType") + name = name.gsub(/DataType\z/, "") + components = name.scan(/(UInt[0-9]+|[A-Z][a-z\d]+)/).flatten + available_types << components.collect(&:downcase).join("_").to_sym + end + message = + "unknown type: <#{data_type.inspect}>: " + + "available types: #{available_types.inspect}" + raise ArgumentError, message + end + data_type_class = Arrow.const_get(data_type_class_name) + if data_type_class.gtype.abstract? + not_abstract_types = data_type_class.sub_types.find_all do |sub_type| + not sub_type.gtype.abstract? + end + not_abstract_types = not_abstract_types.sort_by do |type| + type.name + end + message = + "abstract type: <#{data_type.inspect}>: " + + "use one of not abstract type: #{not_abstract_types.inspect}" + raise ArgumentError, message + end + data_type_class + end + end + + def build_array(values) + base_name = self.class.name.gsub(/DataType\z/, "") + builder_class = self.class.const_get("#{base_name}ArrayBuilder") + args = [values] + args.unshift(self) unless builder_class.buildable?(args) + builder_class.build(*args) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/date32-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/date32-array-builder.rb new file mode 100644 index 000000000..dedbba85e --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/date32-array-builder.rb @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Date32ArrayBuilder + private + UNIX_EPOCH = Date.new(1970, 1, 1) + def convert_to_arrow_value(value) + value = value.to_date if value.respond_to?(:to_date) + + if value.is_a?(Date) + (value - UNIX_EPOCH).to_i + else + value + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/date32-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/date32-array.rb new file mode 100644 index 000000000..121dbcb55 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/date32-array.rb @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Date32Array + def get_value(i) + to_date(get_raw_value(i)) + end + + private + UNIX_EPOCH = 2440588 + def to_date(raw_value) + Date.jd(UNIX_EPOCH + raw_value) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/date64-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/date64-array-builder.rb new file mode 100644 index 000000000..658118122 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/date64-array-builder.rb @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Date64ArrayBuilder + private + def convert_to_arrow_value(value) + if value.respond_to?(:to_time) and not value.is_a?(::Time) + value = value.to_time + end + + if value.is_a?(::Time) + value.to_i * 1_000 + value.usec / 1_000 + else + value + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/date64-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/date64-array.rb new file mode 100644 index 000000000..9b8a92476 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/date64-array.rb @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Date64Array + def get_value(i) + to_datetime(get_raw_value(i)) + end + + private + def to_datetime(raw_value) + ::Time.at(*raw_value.divmod(1_000)).to_datetime + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/datum.rb b/src/arrow/ruby/red-arrow/lib/arrow/datum.rb new file mode 100644 index 000000000..196a18f54 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/datum.rb @@ -0,0 +1,100 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Datum + class << self + # @api private + def try_convert(value) + case value + when Table + TableDatum.new(value) + when Array + ArrayDatum.new(value) + when ChunkedArray + ChunkedArrayDatum.new(value) + when Scalar + ScalarDatum.new(value) + when ::Array + ArrayDatum.new(ArrayBuilder.build(value)) + when Integer + case value + when (0..((2 ** 8) - 1)) + try_convert(UInt8Scalar.new(value)) + when ((-(2 ** 7))..((2 ** 7) - 1)) + try_convert(Int8Scalar.new(value)) + when (0..((2 ** 16) - 1)) + try_convert(UInt16Scalar.new(value)) + when ((-(2 ** 15))..((2 ** 15) - 1)) + try_convert(Int16Scalar.new(value)) + when (0..((2 ** 32) - 1)) + try_convert(UInt32Scalar.new(value)) + when ((-(2 ** 31))..((2 ** 31) - 1)) + try_convert(Int32Scalar.new(value)) + when (0..((2 ** 64) - 1)) + try_convert(UInt64Scalar.new(value)) + when ((-(2 ** 63))..((2 ** 63) - 1)) + try_convert(Int64Scalar.new(value)) + else + nil + end + when Float + try_convert(DoubleScalar.new(value)) + when true, false + try_convert(BooleanScalar.new(value)) + when String + if value.ascii_only? or value.encoding == Encoding::UTF_8 + if value.bytesize <= ((2 ** 31) - 1) + try_convert(StringScalar.new(value)) + else + try_convert(LargeStringScalar.new(value)) + end + else + if value.bytesize <= ((2 ** 31) - 1) + try_convert(BinaryScalar.new(value)) + else + try_convert(LargeBinaryScalar.new(value)) + end + end + when Date + date32_value = (value - Date32ArrayBuilder::UNIX_EPOCH).to_i + try_convert(Date32Scalar.new(date32_value)) + when Time + case value.unit + when TimeUnit::SECOND, TimeUnit::MILLI + data_type = Time32DataType.new(value.unit) + scalar_class = Time32Scalar + else + data_type = Time64DataType.new(value.unit) + scalar_class = Time64Scalar + end + try_convert(scalar_class.new(data_type, value.value)) + when ::Time + data_type = TimestampDataType.new(:nano) + timestamp_value = value.to_i * 1_000_000_000 + value.nsec + try_convert(TimestampScalar.new(data_type, timestamp_value)) + when Decimal128 + data_type = TimestampDataType.new(:nano) + timestamp_value = value.to_i * 1_000_000_000 + value.nsec + try_convert(Decimal128Scalar.new(data_type, timestamp_value)) + else + nil + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/decimal128-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/decimal128-array-builder.rb new file mode 100644 index 000000000..d380ce070 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/decimal128-array-builder.rb @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Decimal128ArrayBuilder + class << self + def build(data_type, values) + builder = new(data_type) + builder.build(values) + end + end + + alias_method :append_value_raw, :append_value + def append_value(value) + append_value_raw(normalize_value(value)) + end + + alias_method :append_values_raw, :append_values + def append_values(values, is_valids=nil) + if values.is_a?(::Array) + values = values.collect do |value| + normalize_value(value) + end + append_values_raw(values, is_valids) + else + append_values_packed(values, is_valids) + end + end + + private + def normalize_value(value) + case value + when String + Decimal128.new(value) + when Float + Decimal128.new(value.to_s) + when BigDecimal + Decimal128.new(value.to_s) + else + value + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/decimal128-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/decimal128-array.rb new file mode 100644 index 000000000..a5ee53be7 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/decimal128-array.rb @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Decimal128Array + def get_value(i) + BigDecimal(format_value(i)) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/decimal128-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/decimal128-data-type.rb new file mode 100644 index 000000000..4b5583896 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/decimal128-data-type.rb @@ -0,0 +1,71 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Decimal128DataType + MAX_PRECISION = max_precision + + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::Decimal128DataType}. + # + # @overload initialize(precision, scale) + # + # @param precision [Integer] The precision of the decimal data + # type. It's the number of digits including the number of + # digits after the decimal point. + # + # @param scale [Integer] The scale of the decimal data + # type. It's the number of digits after the decimal point. + # + # @example Create a decimal data type for "XXXXXX.YY" decimal + # Arrow::Decimal128DataType.new(8, 2) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the decimal data + # type. It must have `:precision` and `:scale` values. + # + # @option description [Integer] :precision The precision of the + # decimal data type. It's the number of digits including the + # number of digits after the decimal point. + # + # @option description [Integer] :scale The scale of the decimal + # data type. It's the number of digits after the decimal + # point. + # + # @example Create a decimal data type for "XXXXXX.YY" decimal + # Arrow::Decimal128DataType.new(precision: 8, + # scale: 2) + def initialize(*args) + n_args = args.size + case n_args + when 1 + description = args[0] + precision = description[:precision] + scale = description[:scale] + when 2 + precision, scale = args + else + message = "wrong number of arguments (given, #{n_args}, expected 1..2)" + raise ArgumentError, message + end + initialize_raw(precision, scale) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/decimal128.rb b/src/arrow/ruby/red-arrow/lib/arrow/decimal128.rb new file mode 100644 index 000000000..bf853ae7f --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/decimal128.rb @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Decimal128 + alias_method :to_s_raw, :to_s + + # @overload to_s + # + # @return [String] + # The string representation of the decimal. + # + # @overload to_s(scale) + # + # @param scale [Integer] The scale of the decimal. + # @return [String] + # The string representation of the decimal including the scale. + # + # @since 0.13.0 + def to_s(scale=nil) + if scale + to_string_scale(scale) + else + to_s_raw + end + end + + alias_method :abs!, :abs + + # @since 3.0.0 + def abs + copied = dup + copied.abs! + copied + end + + alias_method :negate!, :negate + + # @since 3.0.0 + def negate + copied = dup + copied.negate! + copied + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/decimal256-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/decimal256-array-builder.rb new file mode 100644 index 000000000..fb89ff00b --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/decimal256-array-builder.rb @@ -0,0 +1,61 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Decimal256ArrayBuilder + class << self + # @since 3.0.0 + def build(data_type, values) + builder = new(data_type) + builder.build(values) + end + end + + alias_method :append_value_raw, :append_value + # @since 3.0.0 + def append_value(value) + append_value_raw(normalize_value(value)) + end + + alias_method :append_values_raw, :append_values + # @since 3.0.0 + def append_values(values, is_valids=nil) + if values.is_a?(::Array) + values = values.collect do |value| + normalize_value(value) + end + append_values_raw(values, is_valids) + else + append_values_packed(values, is_valids) + end + end + + private + def normalize_value(value) + case value + when String + Decimal256.new(value) + when Float + Decimal256.new(value.to_s) + when BigDecimal + Decimal256.new(value.to_s) + else + value + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/decimal256-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/decimal256-array.rb new file mode 100644 index 000000000..8c2306dfe --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/decimal256-array.rb @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Decimal256Array + # @since 3.0.0 + def get_value(i) + BigDecimal(format_value(i)) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/decimal256-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/decimal256-data-type.rb new file mode 100644 index 000000000..8264e388e --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/decimal256-data-type.rb @@ -0,0 +1,73 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Decimal256DataType + MAX_PRECISION = max_precision + + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::Decimal256DataType}. + # + # @overload initialize(precision, scale) + # + # @param precision [Integer] The precision of the decimal data + # type. It's the number of digits including the number of + # digits after the decimal point. + # + # @param scale [Integer] The scale of the decimal data + # type. It's the number of digits after the decimal point. + # + # @example Create a decimal data type for "XXXXXX.YY" decimal + # Arrow::Decimal256DataType.new(8, 2) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the decimal data + # type. It must have `:precision` and `:scale` values. + # + # @option description [Integer] :precision The precision of the + # decimal data type. It's the number of digits including the + # number of digits after the decimal point. + # + # @option description [Integer] :scale The scale of the decimal + # data type. It's the number of digits after the decimal + # point. + # + # @example Create a decimal data type for "XXXXXX.YY" decimal + # Arrow::Decimal256DataType.new(precision: 8, + # scale: 2) + # + # @since 3.0.0 + def initialize(*args) + n_args = args.size + case n_args + when 1 + description = args[0] + precision = description[:precision] + scale = description[:scale] + when 2 + precision, scale = args + else + message = "wrong number of arguments (given, #{n_args}, expected 1..2)" + raise ArgumentError, message + end + initialize_raw(precision, scale) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/decimal256.rb b/src/arrow/ruby/red-arrow/lib/arrow/decimal256.rb new file mode 100644 index 000000000..1a7097a4d --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/decimal256.rb @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Decimal256 + alias_method :to_s_raw, :to_s + + # @overload to_s + # + # @return [String] + # The string representation of the decimal. + # + # @overload to_s(scale) + # + # @param scale [Integer] The scale of the decimal. + # @return [String] + # The string representation of the decimal including the scale. + # + # @since 3.0.0 + def to_s(scale=nil) + if scale + to_string_scale(scale) + else + to_s_raw + end + end + + alias_method :abs!, :abs + + # @since 3.0.0 + def abs + copied = dup + copied.abs! + copied + end + + alias_method :negate!, :negate + + # @since 3.0.0 + def negate + copied = dup + copied.negate! + copied + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/dense-union-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/dense-union-data-type.rb new file mode 100644 index 000000000..6d2bf5e70 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/dense-union-data-type.rb @@ -0,0 +1,90 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class DenseUnionDataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::DenseUnionDataType}. + # + # @overload initialize(fields, type_codes) + # + # @param fields [::Array<Arrow::Field, Hash>] The fields of the + # dense union data type. You can mix {Arrow::Field} and field + # description in the fields. + # + # See {Arrow::Field.new} how to specify field description. + # + # @param type_codes [::Array<Integer>] The IDs that indicates + # corresponding fields. + # + # @example Create a dense union data type for `{2: visible, 9: count}` + # fields = [ + # Arrow::Field.new("visible", :boolean), + # { + # name: "count", + # type: :int32, + # }, + # ] + # Arrow::DenseUnionDataType.new(fields, [2, 9]) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the dense union + # data type. It must have `:fields` and `:type_codes` values. + # + # @option description [::Array<Arrow::Field, Hash>] :fields The + # fields of the dense union data type. You can mix + # {Arrow::Field} and field description in the fields. + # + # See {Arrow::Field.new} how to specify field description. + # + # @option description [::Array<Integer>] :type_codes The IDs + # that indicates corresponding fields. + # + # @example Create a dense union data type for `{2: visible, 9: count}` + # fields = [ + # Arrow::Field.new("visible", :boolean), + # { + # name: "count", + # type: :int32, + # }, + # ] + # Arrow::DenseUnionDataType.new(fields: fields, + # type_codes: [2, 9]) + def initialize(*args) + n_args = args.size + case n_args + when 1 + description = args[0] + fields = description[:fields] + type_codes = description[:type_codes] + when 2 + fields, type_codes = args + else + message = "wrong number of arguments (given, #{n_args}, expected 1..2)" + raise ArgumentError, message + end + fields = fields.collect do |field| + field = Field.new(field) unless field.is_a?(Field) + field + end + initialize_raw(fields, type_codes) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/dictionary-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/dictionary-array.rb new file mode 100644 index 000000000..70591ab7c --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/dictionary-array.rb @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class DictionaryArray + def get_value(i) + dictionary[indices[i]] + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/dictionary-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/dictionary-data-type.rb new file mode 100644 index 000000000..8396e311c --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/dictionary-data-type.rb @@ -0,0 +1,117 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class DictionaryDataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::DictionaryDataType}. + # + # @overload initialize(index_data_type, value_data_type, ordered) + # + # @param index_data_type [Arrow::DataType, Hash, String, Symbol] + # The index data type of the dictionary data type. It must be + # signed integer data types. Here are available signed integer + # data types: + # + # * Arrow::Int8DataType + # * Arrow::Int16DataType + # * Arrow::Int32DataType + # * Arrow::Int64DataType + # + # You can specify data type as a description by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @param value_data_type [Arrow::DataType, Hash, String, Symbol] + # The value data type of the dictionary data type. + # + # You can specify data type as a description by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @param ordered [Boolean] Whether dictionary contents are + # ordered or not. + # + # @example Create a dictionary data type for `{0: "Hello", 1: "World"}` + # index_data_type = :int8 + # value_data_type = :string + # ordered = true + # Arrow::DictionaryDataType.new(index_data_type, + # value_data_type, + # ordered) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the dictionary + # data type. It must have `:index_data_type`, `:dictionary` + # and `:ordered` values. + # + # @option description [Arrow::DataType, Hash, String, Symbol] + # :index_data_type The index data type of the dictionary data + # type. It must be signed integer data types. Here are + # available signed integer data types: + # + # * Arrow::Int8DataType + # * Arrow::Int16DataType + # * Arrow::Int32DataType + # * Arrow::Int64DataType + # + # You can specify data type as a description by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @option description [Arrow::DataType, Hash, String, Symbol] + # :value_data_type + # The value data type of the dictionary data type. + # + # You can specify data type as a description by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @option description [Boolean] :ordered Whether dictionary + # contents are ordered or not. + # + # @example Create a dictionary data type for `{0: "Hello", 1: "World"}` + # Arrow::DictionaryDataType.new(index_data_type: :int8, + # value_data_type: :string, + # ordered: true) + def initialize(*args) + n_args = args.size + case n_args + when 1 + description = args[0] + index_data_type = description[:index_data_type] + value_data_type = description[:value_data_type] + ordered = description[:ordered] + when 3 + index_data_type, value_data_type, ordered = args + else + message = "wrong number of arguments (given, #{n_args}, expected 1 or 3)" + raise ArgumentError, message + end + index_data_type = DataType.resolve(index_data_type) + value_data_type = DataType.resolve(value_data_type) + initialize_raw(index_data_type, value_data_type, ordered) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/equal-options.rb b/src/arrow/ruby/red-arrow/lib/arrow/equal-options.rb new file mode 100644 index 000000000..4eb9964ad --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/equal-options.rb @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class EqualOptions + class << self + # @api private + def try_convert(value) + case value + when Hash + options = new + value.each do |k, v| + setter = :"#{k}=" + return unless options.respond_to?(setter) + options.__send__(setter, v) + end + options + else + nil + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/expression.rb b/src/arrow/ruby/red-arrow/lib/arrow/expression.rb new file mode 100644 index 000000000..a33cc53c2 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/expression.rb @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Expression + class << self + # @api private + def try_convert(value) + case value + when Symbol + FieldExpression.new(value.to_s) + when ::Array + function_name, *arguments = value + case function_name + when String, Symbol + function_name = function_name.to_s + else + return nil + end + if arguments.last.is_a?(FunctionOptions) + options = arguments.pop + else + options = nil + end + CallExpression.new(function_name, arguments, options) + else + datum = Datum.try_convert(value) + return nil if datum.nil? + LiteralExpression.new(datum) + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/field-containable.rb b/src/arrow/ruby/red-arrow/lib/arrow/field-containable.rb new file mode 100644 index 000000000..e4dbf4ec2 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/field-containable.rb @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + module FieldContainable + def find_field(name_or_index) + case name_or_index + when String, Symbol + name = name_or_index + get_field_by_name(name) + when Integer + index = name_or_index + raise if index < 0 + index += n_fields if index < 0 + return nil if index < 0 or index >= n_fields + get_field(index) + else + message = "field name or index must be String, Symbol or Integer" + message << ": <#{name_or_index.inspect}>" + raise ArgumentError, message + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/field.rb b/src/arrow/ruby/red-arrow/lib/arrow/field.rb new file mode 100644 index 000000000..e439cb960 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/field.rb @@ -0,0 +1,118 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Field + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::Field}. + # + # @overload initialize(name, data_type) + # + # @param name [String, Symbol] The name of the field. + # + # @param data_type [Arrow::DataType, Hash, String, Symbol] The + # data type of the field. + # + # You can specify data type as a description by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @example Create a field with {Arrow::DataType}s + # Arrow::Field.new("visible", Arrow::BooleanDataType.new) + # + # @example Create a field with data type description + # Arrow::Field.new("visible", :boolean) + # + # @example Create a field with name as `Symbol` + # Arrow::Field.new(:visible, :boolean) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the field. + # + # Field description is a raw `Hash`. Field description must + # have `:name` and `:data_type` values. `:name` is the name of + # the field. `:data_type` is the data type of the field. You + # can use {Arrow::DataType} or data type description as + # `:data_type` value. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # There is a shortcut for convenience. If field description + # doesn't have `:data_type`, all keys except `:name` are + # processes as data type description. For example, the + # following field descriptions are the same: + # + # ```ruby + # {name: "visible", data_type: {type: :boolean}} + # {name: "visible", type: :boolean} # Shortcut version + # ``` + # + # @option description [String, Symbol] :name The name of the field. + # + # @option description [Arrow::DataType, Hash] :data_type The + # data type of the field. You can specify data type description + # by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @example Create a field with {Arrow::DataType}s + # Arrow::Field.new(name: "visible", + # data_type: Arrow::BooleanDataType.new) + # + # @example Create a field with data type description + # Arrow::Field.new(name: "visible", data_type: {type: :boolean} + # + # @example Create a field with shortcut form + # Arrow::Field.new(name: "visible", type: :boolean) + def initialize(*args) + n_args = args.size + case n_args + when 1 + description = args[0] + name = nil + data_type = nil + data_type_description = {} + description.each do |key, value| + key = key.to_sym + case key + when :name + name = value + when :data_type + data_type = DataType.resolve(value) + else + data_type_description[key] = value + end + end + data_type ||= DataType.resolve(data_type_description) + when 2 + name = args[0] + data_type = DataType.resolve(args[1]) + else + message = "wrong number of arguments (given #{n_args}, expected 1..2)" + raise ArgumentError, message + end + + initialize_raw(name, data_type) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/file-output-stream.rb b/src/arrow/ruby/red-arrow/lib/arrow/file-output-stream.rb new file mode 100644 index 000000000..f39ad14ca --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/file-output-stream.rb @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class FileOutputStream + alias_method :initialize_raw, :initialize + private :initialize_raw + def initialize(path, options={}) + append = nil + case options + when true, false + append = options + when Hash + append = options[:append] + end + append = false if append.nil? + initialize_raw(path, append) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/file-system.rb b/src/arrow/ruby/red-arrow/lib/arrow/file-system.rb new file mode 100644 index 000000000..7d105b42a --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/file-system.rb @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class FileSystem + alias_method :open_output_stream_raw, :open_output_stream + def open_output_stream(path) + stream = open_output_stream_raw(path) + if block_given? + begin + yield(stream) + ensure + stream.close + end + else + stream + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/fixed-size-binary-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/fixed-size-binary-array-builder.rb new file mode 100644 index 000000000..516d8143d --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/fixed-size-binary-array-builder.rb @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class FixedSizeBinaryArrayBuilder + class << self + # @since 3.0.0 + def build(data_type, values) + builder = new(data_type) + builder.build(values) + end + end + + alias_method :append_values_raw, :append_values + # @since 3.0.0 + def append_values(values, is_valids=nil) + if values.is_a?(::Array) + append_values_raw(values, is_valids) + else + append_values_packed(values, is_valids) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/fixed-size-binary-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/fixed-size-binary-array.rb new file mode 100644 index 000000000..37c121d8e --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/fixed-size-binary-array.rb @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class FixedSizeBinaryArray + alias_method :get_value_raw, :get_value + # @since 3.0.0 + def get_value(i) + get_value_raw(i).to_s + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/generic-filterable.rb b/src/arrow/ruby/red-arrow/lib/arrow/generic-filterable.rb new file mode 100644 index 000000000..50a79142a --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/generic-filterable.rb @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + module GenericFilterable + class << self + def included(base) + base.__send__(:alias_method, :filter_raw, :filter) + base.__send__(:alias_method, :filter, :filter_generic) + end + end + + def filter_generic(filter, options=nil) + case filter + when ::Array + filter_raw(BooleanArray.new(filter), options) + when ChunkedArray + if respond_to?(:filter_chunked_array) + filter_chunked_array(filter, options) + else + # TODO: Implement this in C++ + filter_raw(filter.pack, options) + end + else + filter_raw(filter, options) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/generic-takeable.rb b/src/arrow/ruby/red-arrow/lib/arrow/generic-takeable.rb new file mode 100644 index 000000000..f32b43f22 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/generic-takeable.rb @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + module GenericTakeable + class << self + def included(base) + base.__send__(:alias_method, :take_raw, :take) + base.__send__(:alias_method, :take, :take_generic) + end + end + + def take_generic(indices) + case indices + when ::Array + take_raw(IntArrayBuilder.build(indices)) + when ChunkedArray + take_chunked_array(indices) + else + take_raw(indices) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/group.rb b/src/arrow/ruby/red-arrow/lib/arrow/group.rb new file mode 100644 index 000000000..7827ac0bd --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/group.rb @@ -0,0 +1,164 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Group + def initialize(table, keys) + @table = table + @keys = keys + end + + def count(*target_names) + aggregate(*build_aggregations("hash_count", target_names)) + end + + def sum(*target_names) + aggregate(*build_aggregations("hash_sum", target_names)) + end + + def product(*target_names) + aggregate(*build_aggregations("hash_product", target_names)) + end + + def mean(*target_names) + aggregate(*build_aggregations("hash_mean", target_names)) + end + + def min(*target_names) + aggregate(*build_aggregations("hash_min", target_names)) + end + + def max(*target_names) + aggregate(*build_aggregations("hash_max", target_names)) + end + + def stddev(*target_names) + aggregate(*build_aggregations("hash_stddev", target_names)) + end + + def variance(*target_names) + aggregate(*build_aggregations("hash_variance", target_names)) + end + + def aggregate(aggregation, *more_aggregations) + aggregations = [aggregation] + more_aggregations + normalized_aggregations = normalize_aggregations(aggregations) + plan = ExecutePlan.new + source_node = plan.build_source_node(@table) + aggregate_node = + plan.build_aggregate_node(source_node, + { + aggregations: normalized_aggregations, + keys: @keys + }) + sink_node_options = SinkNodeOptions.new + plan.build_sink_node(aggregate_node, sink_node_options) + plan.validate + plan.start + plan.wait + reader = sink_node_options.get_reader(aggregate_node.output_schema) + reader.read_all + end + + private + def build_aggregations(function_name, target_names) + if target_names.empty? + [function_name] + else + target_names.collect do |name| + "#{function_name}(#{name})" + end + end + end + + def normalize_aggregations(aggregations) + normalized_aggregations = [] + aggregations.each do |aggregation| + case aggregation + when :all + all_functions = [ + "hash_count", + "hash_sum", + "hash_product", + "hash_mean", + "hash_stddev", + "hash_variance", + # "hash_tdigest", + "hash_min", + "hash_max", + "hash_any", + "hash_all", + ] + normalized_aggregations.concat(normalize_aggregations(all_functions)) + when /\A([a-zA-Z0-9_].+?)\((.+?)\)\z/ + function = $1 + input = $2.strip + normalized_aggregations << {function: function, input: input} + when "count", "hash_count" + function = aggregation + target_columns.each do |column| + normalized_aggregations << {function: function, input: column.name} + end + when "any", "hash_any", "all", "hash_all" + function = aggregation + boolean_target_columns.each do |column| + normalized_aggregations << {function: function, input: column.name} + end + when String + function = aggregation + numeric_target_columns.each do |column| + normalized_aggregations << {function: function, input: column.name} + end + else + normalized_aggregations << aggregation + end + end + normalized_aggregations + end + + def target_columns + @target_columns ||= find_target_columns + end + + def find_target_columns + key_names = @keys.collect(&:to_s) + @table.columns.find_all do |column| + not key_names.include?(column.name) + end + end + + def boolean_target_columns + @boolean_target_columns ||= find_boolean_target_columns + end + + def find_boolean_target_columns + target_columns.find_all do |column| + column.data_type.is_a?(BooleanDataType) + end + end + + def numeric_target_columns + @numeric_target_columns ||= find_numeric_target_columns + end + + def find_numeric_target_columns + target_columns.find_all do |column| + column.data_type.is_a?(NumericDataType) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/list-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/list-array-builder.rb new file mode 100644 index 000000000..d889c8a0c --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/list-array-builder.rb @@ -0,0 +1,96 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class ListArrayBuilder + class << self + def build(data_type, values) + builder = new(data_type) + builder.build(values) + end + end + + alias_method :append_value_raw, :append_value + + # @overload append_value + # + # Starts appending a list record. You also need to append list + # value by {#value_builder}. + # + # @overload append_value(list) + # + # Appends a list record including list value. + # + # @param value [nil, ::Array] The list value of the record. + # + # If this is `nil`, the list record is null. + # + # If this is `Array`, it's the list value of the record. + # + # @since 0.12.0 + def append_value(*args) + n_args = args.size + + case n_args + when 0 + append_value_raw + when 1 + value = args[0] + case value + when nil + append_null + when ::Array + append_value_raw + @value_builder ||= value_builder + @value_builder.append(*value) + else + message = "list value must be nil or Array: #{value.inspect}" + raise ArgumentError, message + end + else + message = "wrong number of arguments (given #{n_args}, expected 0..1)" + raise ArgumentError, message + end + end + + def append_values(lists, is_valids=nil) + if is_valids + is_valids.each_with_index do |is_valid, i| + if is_valid + append_value(lists[i]) + else + append_null + end + end + else + lists.each do |list| + append_value(list) + end + end + end + + # @since 0.12.0 + def append(*values) + if values.empty? + # For backward compatibility + append_value + else + super + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/list-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/list-data-type.rb new file mode 100644 index 000000000..cfcdd2a9e --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/list-data-type.rb @@ -0,0 +1,118 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class ListDataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::ListDataType}. + # + # @overload initialize(field) + # + # @param field [Arrow::Field, Hash] The field of the list data + # type. You can also specify field description by `Hash`. + # + # See {Arrow::Field.new} how to specify field description. + # + # @example Create a list data type with {Arrow::Field} + # visible_field = Arrow::Field.new("visible", :boolean) + # Arrow::ListDataType.new(visible_field) + # + # @example Create a list data type with field description + # Arrow::ListDataType.new(name: "visible", type: :boolean) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the list data + # type. It must have `:field` value. + # + # @option description [Arrow::Field, Hash] :field The field of + # the list data type. You can also specify field description + # by `Hash`. + # + # See {Arrow::Field.new} how to specify field description. + # + # @example Create a list data type with {Arrow::Field} + # visible_field = Arrow::Field.new("visible", :boolean) + # Arrow::ListDataType.new(field: visible_field) + # + # @example Create a list data type with field description + # Arrow::ListDataType.new(field: {name: "visible", type: :boolean}) + # + # @overload initialize(data_type) + # + # @param data_type [Arrow::DataType, String, Symbol, + # ::Array<String>, ::Array<Symbol>, Hash] The element data + # type of the list data type. A field is created with the + # default name `"item"` from the data type automatically. + # + # See {Arrow::DataType.resolve} how to specify data type. + # + # @example Create a list data type with {Arrow::DataType} + # Arrow::ListDataType.new(Arrow::BooleanDataType.new) + # + # @example Create a list data type with data type name as String + # Arrow::ListDataType.new("boolean") + # + # @example Create a list data type with data type name as Symbol + # Arrow::ListDataType.new(:boolean) + # + # @example Create a list data type with data type as Array + # Arrow::ListDataType.new([:time32, :milli]) + def initialize(arg) + data_type = resolve_data_type(arg) + if data_type + field = Field.new(default_field_name, data_type) + else + field = resolve_field(arg) + end + initialize_raw(field) + end + + private + def resolve_data_type(arg) + case arg + when DataType, String, Symbol, ::Array + DataType.resolve(arg) + when Hash + return nil if arg[:name] + return nil unless arg[:type] + DataType.resolve(arg) + else + nil + end + end + + def default_field_name + "item" + end + + def resolve_field(arg) + if arg.is_a?(Hash) and arg.key?(:field) + description = arg + arg = description[:field] + end + if arg.is_a?(Hash) + field_description = arg + Field.new(field_description) + else + arg + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/loader.rb b/src/arrow/ruby/red-arrow/lib/arrow/loader.rb new file mode 100644 index 000000000..804a94894 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/loader.rb @@ -0,0 +1,216 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "arrow/block-closable" + +module Arrow + class Loader < GObjectIntrospection::Loader + class << self + def load + super("Arrow", Arrow) + end + end + + private + def post_load(repository, namespace) + require_libraries + require_extension_library + gc_guard + end + + def require_libraries + require "arrow/column-containable" + require "arrow/field-containable" + require "arrow/generic-filterable" + require "arrow/generic-takeable" + require "arrow/record-containable" + require "arrow/symbol-values-appendable" + + require "arrow/aggregate-node-options" + require "arrow/aggregation" + require "arrow/array" + require "arrow/array-builder" + require "arrow/bigdecimal-extension" + require "arrow/binary-dictionary-array-builder" + require "arrow/buffer" + require "arrow/chunked-array" + require "arrow/column" + require "arrow/compression-type" + require "arrow/csv-loader" + require "arrow/csv-read-options" + require "arrow/data-type" + require "arrow/date32-array" + require "arrow/date32-array-builder" + require "arrow/date64-array" + require "arrow/date64-array-builder" + require "arrow/datum" + require "arrow/decimal128" + require "arrow/decimal128-array" + require "arrow/decimal128-array-builder" + require "arrow/decimal128-data-type" + require "arrow/decimal256" + require "arrow/decimal256-array" + require "arrow/decimal256-array-builder" + require "arrow/decimal256-data-type" + require "arrow/dense-union-data-type" + require "arrow/dictionary-array" + require "arrow/dictionary-data-type" + require "arrow/equal-options" + require "arrow/expression" + require "arrow/field" + require "arrow/file-output-stream" + require "arrow/file-system" + require "arrow/fixed-size-binary-array" + require "arrow/fixed-size-binary-array-builder" + require "arrow/group" + require "arrow/list-array-builder" + require "arrow/list-data-type" + require "arrow/map-array" + require "arrow/map-array-builder" + require "arrow/map-data-type" + require "arrow/null-array" + require "arrow/null-array-builder" + require "arrow/path-extension" + require "arrow/record" + require "arrow/record-batch" + require "arrow/record-batch-builder" + require "arrow/record-batch-file-reader" + require "arrow/record-batch-iterator" + require "arrow/record-batch-reader" + require "arrow/record-batch-stream-reader" + require "arrow/rolling-window" + require "arrow/scalar" + require "arrow/schema" + require "arrow/slicer" + require "arrow/sort-key" + require "arrow/sort-options" + require "arrow/source-node-options" + require "arrow/sparse-union-data-type" + require "arrow/string-dictionary-array-builder" + require "arrow/struct-array" + require "arrow/struct-array-builder" + require "arrow/struct-data-type" + require "arrow/table" + require "arrow/table-concatenate-options" + require "arrow/table-formatter" + require "arrow/table-list-formatter" + require "arrow/table-table-formatter" + require "arrow/table-loader" + require "arrow/table-saver" + require "arrow/tensor" + require "arrow/time" + require "arrow/time32-array" + require "arrow/time32-array-builder" + require "arrow/time32-data-type" + require "arrow/time64-array" + require "arrow/time64-array-builder" + require "arrow/time64-data-type" + require "arrow/timestamp-array" + require "arrow/timestamp-array-builder" + require "arrow/timestamp-data-type" + require "arrow/writable" + end + + def require_extension_library + require "arrow.so" + end + + def gc_guard + require "arrow/constructor-arguments-gc-guardable" + + [ + @base_module::BinaryScalar, + @base_module::Buffer, + @base_module::DenseUnionScalar, + @base_module::FixedSizeBinaryScalar, + @base_module::LargeBinaryScalar, + @base_module::LargeListScalar, + @base_module::LargeStringScalar, + @base_module::ListScalar, + @base_module::MapScalar, + @base_module::SparseUnionScalar, + @base_module::StringScalar, + @base_module::StructScalar, + ].each do |klass| + klass.prepend(ConstructorArgumentsGCGuardable) + end + end + + def load_object_info(info) + super + + klass = @base_module.const_get(rubyish_class_name(info)) + if klass.method_defined?(:close) + klass.extend(BlockClosable) + end + end + + def load_method_info(info, klass, method_name) + case klass.name + when /Array\z/ + case method_name + when "values" + method_name = "values_raw" + end + end + + case klass.name + when /Builder\z/ + case method_name + when "append" + return + else + super + end + when "Arrow::StringArray" + case method_name + when "get_value" + method_name = "get_raw_value" + when "get_string" + method_name = "get_value" + end + super(info, klass, method_name) + when "Arrow::Date32Array", + "Arrow::Date64Array", + "Arrow::Decimal128Array", + "Arrow::Decimal256Array", + "Arrow::Time32Array", + "Arrow::Time64Array", + "Arrow::TimestampArray" + case method_name + when "get_value" + method_name = "get_raw_value" + end + super(info, klass, method_name) + when "Arrow::Decimal128", "Arrow::Decimal256" + case method_name + when "copy" + method_name = "dup" + end + super(info, klass, method_name) + when "Arrow::BooleanScalar" + case method_name + when "value?" + method_name = "value" + end + super(info, klass, method_name) + else + super + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/map-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/map-array-builder.rb new file mode 100644 index 000000000..9e269d1c5 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/map-array-builder.rb @@ -0,0 +1,109 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class MapArrayBuilder + class << self + def build(data_type, values) + builder = new(data_type) + builder.build(values) + end + end + + alias_method :append_value_raw, :append_value + + # @overload append_value + # + # Starts appending a map record. You need to append + # values of map by {#key_builder} and {#item_builder}. + # + # @overload append_value(value) + # + # Appends a map record including key and item values. + # + # @param value [nil, #each] The map record. + # + # If this is `nil`, the map record is null. + # + # If this is an `Object` that has `#each`, each value is a pair of key and item. + # + # @since 6.0.0 + def append_value(*args) + n_args = args.size + + case n_args + when 0 + append_value_raw + when 1 + value = args[0] + case value + when nil + append_null + else + unless value.respond_to?(:each) + message = "map value must be nil, Hash or Object that has #each: #{value.inspect}" + raise ArgumentError, message + end + append_value_raw + @key_builder ||= key_builder + @item_builder ||= item_builder + case value + when Hash + keys = value.keys + values = value.values + else + keys = [] + values = [] + value.each do |key, item| + keys << key + values << item + end + end + @key_builder.append(*keys) + @item_builder.append(*values) + end + else + message = "wrong number of arguments (given #{n_args}, expected 0..1)" + raise ArgumentError, message + end + end + + alias_method :append_values_raw, :append_values + + def append_values(values, is_valids=nil) + value = values[0] + case value + when Integer + append_values_raw(values, is_valids) + else + if is_valids + is_valids.each_with_index do |is_valid, i| + if is_valid + append_value(values[i]) + else + append_null + end + end + else + values.each do |value| + append_value(value) + end + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/map-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/map-array.rb new file mode 100644 index 000000000..96b8c01b1 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/map-array.rb @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class MapArray + def get_value(i) + super.each_with_object({}) do |item, result| + result[item["key"]] = item["value"] + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/map-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/map-data-type.rb new file mode 100644 index 000000000..67e134329 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/map-data-type.rb @@ -0,0 +1,89 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class MapDataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::MapDataType}. + # + # @overload initialize(key, item) + # + # @param key [Arrow::DataType, Hash, String, Symbol] + # The key data type of the map data type. + # + # You can specify data type as a description by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @param item [Arrow::DataType, Hash, String, Symbol] + # The item data type of the map data type. + # + # You can specify data type as a description by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @example Create a map data type for `{0: "Hello", 1: "World"}` + # key = :int8 + # item = :string + # Arrow::MapDataType.new(key, item) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the map data + # type. It must have `:key`, `:item` values. + # + # @option description [Arrow::DataType, Hash, String, Symbol] + # :key The key data type of the map data type. + # + # You can specify data type as a description by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @option description [Arrow::DataType, Hash, String, Symbol] + # :item The item data type of the map data type. + # + # You can specify data type as a description by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @example Create a map data type for `{0: "Hello", 1: "World"}` + # Arrow::MapDataType.new(key: :int8, item: :string) + def initialize(*args) + n_args = args.size + case n_args + when 1 + description = args[0] + key = description[:key] + item = description[:item] + when 2 + key, item = args + else + message = "wrong number of arguments (given, #{n_args}, expected 1..2)" + raise ArgumentError, message + end + key = DataType.resolve(key) + item = DataType.resolve(item) + initialize_raw(key, item) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/null-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/null-array-builder.rb new file mode 100644 index 000000000..26e58ccdc --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/null-array-builder.rb @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class NullArrayBuilder + class << self + def buildable?(args) + super and not (args.size == 1 and args[0].is_a?(Integer)) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/null-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/null-array.rb new file mode 100644 index 000000000..7426bb345 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/null-array.rb @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class NullArray + def get_value(i) + nil + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/path-extension.rb b/src/arrow/ruby/red-arrow/lib/arrow/path-extension.rb new file mode 100644 index 000000000..1273f298c --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/path-extension.rb @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class PathExtension + def initialize(path) + @path = path + end + + def extract + basename = ::File.basename(@path) + components = basename.split(".") + return {} if components.size < 2 + + extension = components.last.downcase + if components.size > 2 + compression = CompressionType.resolve_extension(extension) + if compression + { + format: components[-2].downcase, + compression: compression, + } + else + {format: extension} + end + else + {format: extension} + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/raw-table-converter.rb b/src/arrow/ruby/red-arrow/lib/arrow/raw-table-converter.rb new file mode 100644 index 000000000..41d331fb3 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/raw-table-converter.rb @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class RawTableConverter + attr_reader :n_rows + attr_reader :schema + attr_reader :values + def initialize(raw_table) + @raw_table = raw_table + convert + end + + private + def convert + if @raw_table.is_a?(::Array) and @raw_table[0].is_a?(Column) + fields = @raw_table.collect(&:field) + @schema = Schema.new(fields) + @values = @raw_table.collect(&:data) + else + fields = [] + @values = [] + @raw_table.each do |name, array| + array = ArrayBuilder.build(array) if array.is_a?(::Array) + fields << Field.new(name.to_s, array.value_data_type) + @values << array + end + @schema = Schema.new(fields) + end + @n_rows = @values[0].length + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/record-batch-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-builder.rb new file mode 100644 index 000000000..dc20312f2 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-builder.rb @@ -0,0 +1,114 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class RecordBatchBuilder + class << self + # @since 0.12.0 + def build(schema, data) + builder = new(schema) + builder.append(data) + builder.flush + end + end + + alias_method :initialize_raw, :initialize + private :initialize_raw + def initialize(schema) + unless schema.is_a?(Schema) + schema = Schema.new(schema) + end + initialize_raw(schema) + @name_to_index = {} + schema.fields.each_with_index do |field, i| + @name_to_index[field.name] = i + end + end + + # @since 0.12.0 + def [](name_or_index) + case name_or_index + when String, Symbol + name = name_or_index + self[resolve_name(name)] + else + index = name_or_index + column_builders[index] + end + end + + # @since 0.12.0 + def append(*values) + values.each do |value| + case value + when Hash + append_columns(value) + else + append_records(value) + end + end + end + + # @since 0.12.0 + def append_records(records) + n = n_columns + columns = n.times.collect do + [] + end + records.each_with_index do |record, nth_record| + case record + when nil + when Hash + record.each do |name, value| + nth_column = resolve_name(name) + next if nth_column.nil? + columns[nth_column] << value + end + else + record.each_with_index do |value, nth_column| + columns[nth_column] << value + end + end + columns.each do |column| + column << nil if column.size != (nth_record + 1) + end + end + columns.each_with_index do |column, i| + self[i].append(*column) + end + end + + # @since 0.12.0 + def append_columns(columns) + columns.each do |name, values| + self[name].append(*values) + end + end + + # @since 0.13.0 + def column_builders + @column_builders ||= n_columns.times.collect do |i| + get_column_builder(i) + end + end + + private + def resolve_name(name) + @name_to_index[name.to_s] + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/record-batch-file-reader.rb b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-file-reader.rb new file mode 100644 index 000000000..86a757e32 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-file-reader.rb @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class RecordBatchFileReader + include Enumerable + + def each + n_record_batches.times do |i| + yield(get_record_batch(i)) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/record-batch-iterator.rb b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-iterator.rb new file mode 100644 index 000000000..4b828c6dc --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-iterator.rb @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class RecordBatchIterator + alias_method :to_a, :to_list + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/record-batch-reader.rb b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-reader.rb new file mode 100644 index 000000000..e030e4f3b --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-reader.rb @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class RecordBatchReader + class << self + # @api private + def try_convert(value) + case value + when ::Array + return nil if value.empty? + if value.all? {|v| v.is_a?(RecordBatch)} + new(value) + else + nil + end + when RecordBatch + new([value]) + when Table + TableBatchReader.new(value) + else + nil + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/record-batch-stream-reader.rb b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-stream-reader.rb new file mode 100644 index 000000000..fa15c8000 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-stream-reader.rb @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class RecordBatchStreamReader + include Enumerable + + def each + loop do + record_batch = next_record_batch + break if record_batch.nil? + yield(record_batch) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/record-batch.rb b/src/arrow/ruby/red-arrow/lib/arrow/record-batch.rb new file mode 100644 index 000000000..c5aaf876b --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/record-batch.rb @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "arrow/raw-table-converter" + +module Arrow + class RecordBatch + include ColumnContainable + include RecordContainable + include Enumerable + + class << self + def new(*args) + n_args = args.size + case n_args + when 1 + raw_table_converter = RawTableConverter.new(args[0]) + n_rows = raw_table_converter.n_rows + schema = raw_table_converter.schema + values = raw_table_converter.values + super(schema, n_rows, values) + when 2 + schema, data = args + RecordBatchBuilder.build(schema, data) + when 3 + super + else + message = "wrong number of arguments (given #{n_args}, expected 1..3)" + raise ArgumentError, message + end + end + end + + alias_method :each, :each_record + + alias_method :size, :n_rows + alias_method :length, :n_rows + + # Converts the record batch to {Arrow::Table}. + # + # @return [Arrow::Table] + # + # @since 0.12.0 + def to_table + Table.new(schema, [self]) + end + + def respond_to_missing?(name, include_private) + return true if find_column(name) + super + end + + def method_missing(name, *args, &block) + if args.empty? + column = find_column(name) + return column if column + end + super + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/record-containable.rb b/src/arrow/ruby/red-arrow/lib/arrow/record-containable.rb new file mode 100644 index 000000000..20c9ac2f5 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/record-containable.rb @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + module RecordContainable + def each_record(reuse_record: false) + unless block_given? + return to_enum(__method__, reuse_record: reuse_record) + end + + if reuse_record + record = Record.new(self, nil) + n_rows.times do |i| + record.index = i + yield(record) + end + else + n_rows.times do |i| + yield(Record.new(self, i)) + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/record.rb b/src/arrow/ruby/red-arrow/lib/arrow/record.rb new file mode 100644 index 000000000..6f83dded0 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/record.rb @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Record + attr_reader :container + attr_accessor :index + def initialize(container, index) + @container = container + @index = index + end + + def [](column_name_or_column_index) + column = @container.find_column(column_name_or_column_index) + return nil if column.nil? + column[@index] + end + + def to_a + @container.columns.collect do |column| + column[@index] + end + end + + def to_h + attributes = {} + @container.columns.each do |column| + attributes[column.name] = column[@index] + end + attributes + end + + def respond_to_missing?(name, include_private) + return true if @container.find_column(name) + super + end + + def method_missing(name, *args, &block) + if args.empty? + column = @container.find_column(name) + return column[@index] if column + end + super + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/rolling-window.rb b/src/arrow/ruby/red-arrow/lib/arrow/rolling-window.rb new file mode 100644 index 000000000..1db03bb23 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/rolling-window.rb @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + # Experimental + # + # TODO: Almost codes should be implemented in Apache Arrow C++. + class RollingWindow + def initialize(table, size) + @table = table + @size = size + end + + def lag(key, diff: 1) + column = @table[key] + if @size + windows = column.each_slice(@size) + else + windows = column + end + lag_values = [nil] * diff + windows.each_cons(diff + 1) do |values| + target = values[0] + current = values[1] + if target.nil? or current.nil? + lag_values << nil + else + lag_values << current - target + end + end + ArrayBuilder.build(lag_values) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/scalar.rb b/src/arrow/ruby/red-arrow/lib/arrow/scalar.rb new file mode 100644 index 000000000..b2bf1ac59 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/scalar.rb @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Scalar + # @param other [Arrow::Scalar] The scalar to be compared. + # @param options [Arrow::EqualOptions, Hash] (nil) + # The options to custom how to compare. + # + # @return [Boolean] + # `true` if both of them have the same data, `false` otherwise. + # + # @since 5.0.0 + def equal_scalar?(other, options=nil) + equal_options(other, options) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/schema.rb b/src/arrow/ruby/red-arrow/lib/arrow/schema.rb new file mode 100644 index 000000000..03354c862 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/schema.rb @@ -0,0 +1,100 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Schema + include FieldContainable + + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::Schema}. + # + # @overload initialize(fields) + # + # @param fields [::Array<Arrow::Field, Hash>] The fields of the + # schema. You can mix {Arrow::Field} and field description in + # the fields. + # + # See {Arrow::Field.new} how to specify field description. + # + # @example Create a schema with {Arrow::Field}s + # visible_field = Arrow::Field.new("visible", :boolean) + # Arrow::Schema.new([visible_field]) + # + # @example Create a schema with field descriptions + # visible_field_description = { + # name: "visible", + # data_type: :boolean, + # } + # Arrow::Schema.new([visible_field_description]) + # + # @example Create a schema with {Arrow::Field}s and field descriptions + # fields = [ + # Arrow::Field.new("visible", :boolean), + # { + # name: "count", + # type: :int32, + # }, + # ] + # Arrow::Schema.new(fields) + # + # @overload initialize(fields) + # + # @param fields [Hash{String, Symbol => Arrow::DataType, Hash}] + # The pairs of field name and field data type of the schema. + # You can mix {Arrow::DataType} and data description for field + # data type. + # + # See {Arrow::DataType.new} how to specify data type description. + # + # @example Create a schema with fields + # fields = { + # "visible" => Arrow::BooleanDataType.new, + # :count => :int32, + # :tags => { + # type: :list, + # field: { + # name: "tag", + # type: :string, + # }, + # }, + # } + # Arrow::Schema.new(fields) + def initialize(fields) + case fields + when ::Array + fields = fields.collect do |field| + field = Field.new(field) unless field.is_a?(Field) + field + end + when Hash + fields = fields.collect do |name, data_type| + Field.new(name, data_type) + end + end + initialize_raw(fields) + end + + alias_method :[], :find_field + + alias_method :to_s_raw, :to_s + def to_s(show_metadata: false) + to_string_metadata(show_metadata) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/slicer.rb b/src/arrow/ruby/red-arrow/lib/arrow/slicer.rb new file mode 100644 index 000000000..6cca7f75e --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/slicer.rb @@ -0,0 +1,355 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Slicer + def initialize(table) + @table = table + end + + def [](column_name) + column = @table[column_name] + return nil if column.nil? + ColumnCondition.new(column) + end + + def respond_to_missing?(name, include_private) + return true if self[name] + super + end + + def method_missing(name, *args, &block) + if args.empty? + column_condition = self[name] + return column_condition if column_condition + end + super + end + + module Helper + class << self + def ensure_boolean(column) + case column.data_type + when Arrow::BooleanDataType + column.data + else + options = CastOptions.new + options.to_data_type = Arrow::BooleanDataType.new + Function.find("cast").execute([column.data], options).value + end + end + end + end + + class Condition + def evaluate + message = "Slicer::Condition must define \#evaluate: #{inspect}" + raise NotImplementedError.new(message) + end + + def &(condition) + AndCondition.new(self, condition) + end + + def |(condition) + OrCondition.new(self, condition) + end + + def ^(condition) + XorCondition.new(self, condition) + end + end + + class LogicalCondition < Condition + def initialize(condition1, condition2) + @condition1 = condition1 + @condition2 = condition2 + end + + def evaluate + function.execute([@condition1.evaluate, @condition2.evaluate]).value + end + end + + class AndCondition < LogicalCondition + private + def function + Function.find("and") + end + end + + class OrCondition < LogicalCondition + private + def function + Function.find("or") + end + end + + class XorCondition < LogicalCondition + private + def function + Function.find("xor") + end + end + + class ColumnCondition < Condition + def initialize(column) + @column = column + end + + def evaluate + Helper.ensure_boolean(@column) + end + + def !@ + NotColumnCondition.new(@column) + end + + def null? + self == nil + end + + def valid? + self != nil + end + + def ==(value) + EqualCondition.new(@column, value) + end + + def !=(value) + NotEqualCondition.new(@column, value) + end + + def <(value) + LessCondition.new(@column, value) + end + + def <=(value) + LessEqualCondition.new(@column, value) + end + + def >(value) + GreaterCondition.new(@column, value) + end + + def >=(value) + GreaterEqualCondition.new(@column, value) + end + + def in?(values) + InCondition.new(@column, values) + end + + def select(&block) + SelectCondition.new(@column, block) + end + + def reject(&block) + RejectCondition.new(@column, block) + end + end + + class NotColumnCondition < Condition + def initialize(column) + @column = column + end + + def evaluate + data = Helper.ensure_boolean(@column) + Function.find("invert").execute([data]).value + end + + def !@ + ColumnCondition.new(@column) + end + end + + class EqualCondition < Condition + def initialize(column, value) + @column = column + @value = value + end + + def !@ + NotEqualCondition.new(@column, @value) + end + + def evaluate + if @value.nil? + Function.find("is_null").execute([@column.data]).value + else + Function.find("equal").execute([@column.data, @value]).value + end + end + end + + class NotEqualCondition < Condition + def initialize(column, value) + @column = column + @value = value + end + + def !@ + EqualCondition.new(@column, @value) + end + + def evaluate + if @value.nil? + Function.find("is_valid").execute([@column.data]).value + else + Function.find("not_equal").execute([@column.data, @value]).value + end + end + end + + class LessCondition < Condition + def initialize(column, value) + @column = column + @value = value + end + + def !@ + GreaterEqualCondition.new(@column, @value) + end + + def evaluate + Function.find("less").execute([@column.data, @value]).value + end + end + + class LessEqualCondition < Condition + def initialize(column, value) + @column = column + @value = value + end + + def !@ + GreaterCondition.new(@column, @value) + end + + def evaluate + Function.find("less_equal").execute([@column.data, @value]).value + end + end + + class GreaterCondition < Condition + def initialize(column, value) + @column = column + @value = value + end + + def !@ + LessEqualCondition.new(@column, @value) + end + + def evaluate + Function.find("greater").execute([@column.data, @value]).value + end + end + + class GreaterEqualCondition < Condition + def initialize(column, value) + @column = column + @value = value + end + + def !@ + LessCondition.new(@column, @value) + end + + def evaluate + Function.find("greater_equal").execute([@column.data, @value]).value + end + end + + class InCondition < Condition + def initialize(column, values) + @column = column + @values = values + end + + def !@ + NotInCondition.new(@column, @values) + end + + def evaluate + values = @values + values = Array.new(values) unless values.is_a?(Array) + options = SetLookupOptions.new(values) + Function.find("is_in").execute([@column.data], options).value + end + end + + class NotInCondition < Condition + def initialize(column, values) + @column = column + @values = values + end + + def !@ + InCondition.new(@column, @values) + end + + def evaluate + values = @values + values = Array.new(values) unless values.is_a?(Array) + options = SetLookupOptions.new(values) + booleans = Function.find("is_in").execute([@column.data], options).value + Function.find("invert").execute([booleans]).value + end + end + + class SelectCondition < Condition + def initialize(column, block) + @column = column + @block = block + end + + def !@ + RejectCondition.new(@column, @block) + end + + def evaluate + BooleanArray.new(@column.collect(&@block)) + end + end + + class RejectCondition < Condition + def initialize(column, block) + @column = column + @block = block + end + + def !@ + SelectCondition.new(@column, @block) + end + + def evaluate + raw_array = @column.collect do |value| + evaluated_value = @block.call(value) + if evaluated_value.nil? + nil + else + not evaluated_value + end + end + BooleanArray.new(raw_array) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/sort-key.rb b/src/arrow/ruby/red-arrow/lib/arrow/sort-key.rb new file mode 100644 index 000000000..987027256 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/sort-key.rb @@ -0,0 +1,193 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class SortKey + class << self + # Ensure returning suitable {Arrow::SortKey}. + # + # @overload resolve(sort_key) + # + # Returns the given sort key itself. This is convenient to use + # this method as {Arrow::SortKey} converter. + # + # @param sort_key [Arrow::SortKey] The sort key. + # + # @return [Arrow::SortKey] The given sort key itself. + # + # @overload resolve(name) + # + # Creates a new suitable sort key from column name with + # leading order mark. See {#initialize} for details about + # order mark. + # + # @return [Arrow::SortKey] A new suitable sort key. + # + # @overload resolve(name, order) + # + # Creates a new suitable sort key from column name without + # leading order mark and order. See {#initialize} for details. + # + # @return [Arrow::SortKey] A new suitable sort key. + # + # @since 4.0.0 + def resolve(name, order=nil) + return name if name.is_a?(self) + new(name, order) + end + + # @api private + def try_convert(value) + case value + when Symbol, String + new(value.to_s, :ascending) + else + nil + end + end + end + + alias_method :initialize_raw, :initialize + private :initialize_raw + # Creates a new {Arrow::SortKey}. + # + # @overload initialize(name) + # + # @param name [Symbol, String] The name of the sort column. + # + # If `name` is a String, the first character may be processed + # as the "leading order mark". If the first character is `"+"` + # or `"-"`, they are processed as a leading order mark. If the + # first character is processed as a leading order mark, the + # first character is removed from sort column name and + # corresponding order is used. `"+"` uses ascending order and + # `"-"` uses ascending order. + # + # If `name` is not a String nor `name` doesn't start with the + # leading order mark, sort column name is `name` as-is and + # ascending order is used. + # + # @example String without the leading order mark + # key = Arrow::SortKey.new("count") + # key.name # => "count" + # key.order # => Arrow::SortOrder::ASCENDING + # + # @example String with the "+" leading order mark + # key = Arrow::SortKey.new("+count") + # key.name # => "count" + # key.order # => Arrow::SortOrder::ASCENDING + # + # @example String with the "-" leading order mark + # key = Arrow::SortKey.new("-count") + # key.name # => "count" + # key.order # => Arrow::SortOrder::DESCENDING + # + # @example Symbol that starts with "-" + # key = Arrow::SortKey.new(:"-count") + # key.name # => "-count" + # key.order # => Arrow::SortOrder::ASCENDING + # + # @overload initialize(name, order) + # + # @param name [Symbol, String] The name of the sort column. + # + # No leading order mark processing. The given `name` is used + # as-is. + # + # @param order [Symbol, String, Arrow::SortOrder] How to order + # by this sort key. + # + # If this is a Symbol or String, this must be `:ascending`, + # `"ascending"`, `:asc`, `"asc"`, `:descending`, + # `"descending"`, `:desc` or `"desc"`. + # + # @example No leading order mark processing + # key = Arrow::SortKey.new("-count", :ascending) + # key.name # => "-count" + # key.order # => Arrow::SortOrder::ASCENDING + # + # @example Order by abbreviated name with Symbol + # key = Arrow::SortKey.new("count", :desc) + # key.name # => "count" + # key.order # => Arrow::SortOrder::DESCENDING + # + # @example Order by String + # key = Arrow::SortKey.new("count", "descending") + # key.name # => "count" + # key.order # => Arrow::SortOrder::DESCENDING + # + # @example Order by Arrow::SortOrder + # key = Arrow::SortKey.new("count", Arrow::SortOrder::DESCENDING) + # key.name # => "count" + # key.order # => Arrow::SortOrder::DESCENDING + # + # @since 4.0.0 + def initialize(name, order=nil) + name, order = normalize_name(name, order) + order = normalize_order(order) || :ascending + initialize_raw(name, order) + end + + # @return [String] The string representation of this sort key. You + # can use recreate {Arrow::SortKey} by + # `Arrow::SortKey.new(key.to_s)`. + # + # @example Recreate Arrow::SortKey + # key = Arrow::SortKey.new("-count") + # key.to_s # => "-count" + # key == Arrow::SortKey.new(key.to_s) # => true + # + # @since 4.0.0 + def to_s + if order == SortOrder::ASCENDING + "+#{name}" + else + "-#{name}" + end + end + + private + def normalize_name(name, order) + case name + when Symbol + return name.to_s, order + when String + return name, order if order + if name.start_with?("-") + return name[1..-1], order || :descending + elsif name.start_with?("+") + return name[1..-1], order || :ascending + else + return name, order + end + else + return name, order + end + end + + def normalize_order(order) + case order + when :asc, "asc" + :ascending + when :desc, "desc" + :descending + else + order + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/sort-options.rb b/src/arrow/ruby/red-arrow/lib/arrow/sort-options.rb new file mode 100644 index 000000000..a7c2d6431 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/sort-options.rb @@ -0,0 +1,109 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class SortOptions + class << self + # @api private + def try_convert(value) + case value + when Symbol, String + new(value) + when ::Array + new(*value) + else + nil + end + end + end + + alias_method :initialize_raw, :initialize + private :initialize_raw + # @param sort_keys [::Array<String, Symbol, Arrow::SortKey>] The + # sort keys to be used. See {Arrow::SortKey.resolve} how to + # resolve each sort key in `sort_keys`. + # + # You can add more sort keys by {#add_sort_key} later. + # + # @example No initial sort keys + # options = Arrow::SortOptions.new + # options.sort_keys # => [] + # + # @example String sort keys + # options = Arrow::SortOptions.new("count", "-age") + # options.sort_keys.collect(&:to_s) # => ["+count", "-age"] + # + # @example Symbol sort keys + # options = Arrow::SortOptions.new(:count, :age) + # options.sort_keys.collect(&:to_s) # => ["+count", "+age"] + # + # @example Mixed sort keys + # options = Arrow::SortOptions.new(:count, "-age") + # options.sort_keys.collect(&:to_s) # => ["+count", "-age"] + # + # @since 4.0.0 + def initialize(*sort_keys) + initialize_raw + sort_keys.each do |sort_key| + add_sort_key(sort_key) + end + end + + # @api private + alias_method :add_sort_key_raw, :add_sort_key + # Add a sort key. + # + # @return [void] + # + # @overload add_sort_key(key) + # + # @param key [Arrow::SortKey] The sort key to be added. + # + # @example Add a key to sort by "price" column in descending order + # options = Arrow::SortOptions.new + # options.add_sort_key(Arrow::SortKey.new(:price, :descending)) + # options.sort_keys.collect(&:to_s) # => ["-price"] + # + # @overload add_sort_key(name) + # + # @param name [Symbol, String] The sort key name to be + # added. See also {Arrow::SortKey#initialize} for the leading + # order mark for String name. + # + # @example Add a key to sort by "price" column in descending order + # options = Arrow::SortOptions.new + # options.add_sort_key("-price") + # options.sort_keys.collect(&:to_s) # => ["-price"] + # + # @overload add_sort_key(name, order) + # + # @param name [Symbol, String] The sort key name. + # + # @param order [Symbol, String, Arrow::SortOrder] The sort + # order. See {Arrow::SortKey#initialize} for details. + # + # @example Add a key to sort by "price" column in descending order + # options = Arrow::SortOptions.new + # options.add_sort_key("price", :desc) + # options.sort_keys.collect(&:to_s) # => ["-price"] + # + # @since 4.0.0 + def add_sort_key(name, order=nil) + add_sort_key_raw(SortKey.resolve(name, order)) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/source-node-options.rb b/src/arrow/ruby/red-arrow/lib/arrow/source-node-options.rb new file mode 100644 index 000000000..402ea85f7 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/source-node-options.rb @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class SourceNodeOptions + class << self + # @api private + def try_convert(value) + case value + when RecordBatchReader, RecordBatch, Table + new(value) + else + nil + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/sparse-union-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/sparse-union-data-type.rb new file mode 100644 index 000000000..14f3e5a7e --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/sparse-union-data-type.rb @@ -0,0 +1,90 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class SparseUnionDataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::SparseUnionDataType}. + # + # @overload initialize(fields, type_codes) + # + # @param fields [::Array<Arrow::Field, Hash>] The fields of the + # sparse union data type. You can mix {Arrow::Field} and field + # description in the fields. + # + # See {Arrow::Field.new} how to specify field description. + # + # @param type_codes [::Array<Integer>] The IDs that indicates + # corresponding fields. + # + # @example Create a sparse union data type for `{2: visible, 9: count}` + # fields = [ + # Arrow::Field.new("visible", :boolean), + # { + # name: "count", + # type: :int32, + # }, + # ] + # Arrow::SparseUnionDataType.new(fields, [2, 9]) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the sparse union + # data type. It must have `:fields` and `:type_codes` values. + # + # @option description [::Array<Arrow::Field, Hash>] :fields The + # fields of the sparse union data type. You can mix + # {Arrow::Field} and field description in the fields. + # + # See {Arrow::Field.new} how to specify field description. + # + # @option description [::Array<Integer>] :type_codes The IDs + # that indicates corresponding fields. + # + # @example Create a sparse union data type for `{2: visible, 9: count}` + # fields = [ + # Arrow::Field.new("visible", :boolean), + # { + # name: "count", + # type: :int32, + # }, + # ] + # Arrow::SparseUnionDataType.new(fields: fields, + # type_codes: [2, 9]) + def initialize(*args) + n_args = args.size + case n_args + when 1 + description = args[0] + fields = description[:fields] + type_codes = description[:type_codes] + when 2 + fields, type_codes = args + else + message = "wrong number of arguments (given, #{n_args}, expected 1..2)" + raise ArgumentError, message + end + fields = fields.collect do |field| + field = Field.new(field) unless field.is_a?(Field) + field + end + initialize_raw(fields, type_codes) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/string-dictionary-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/string-dictionary-array-builder.rb new file mode 100644 index 000000000..fc2f90b80 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/string-dictionary-array-builder.rb @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class StringDictionaryArrayBuilder + include SymbolValuesAppendable + + private + def create_values_array_builder + StringArrayBuilder.new + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/struct-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/struct-array-builder.rb new file mode 100644 index 000000000..ce883166a --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/struct-array-builder.rb @@ -0,0 +1,146 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class StructArrayBuilder + class << self + def build(data_type, values) + builder = new(data_type) + builder.build(values) + end + end + + def [](index_or_name) + find_field_builder(index_or_name) + end + + def find_field_builder(index_or_name) + case index_or_name + when String, Symbol + name = index_or_name + cached_name_to_builder[name.to_s] + else + index = index_or_name + cached_field_builders[index] + end + end + + alias_method :append_value_raw, :append_value + + # @overload append_value + # + # Starts appending a struct record. You need to append values of + # fields. + # + # @overload append_value(value) + # + # Appends a struct record including values of fields. + # + # @param value [nil, ::Array, Hash] The struct record value. + # + # If this is `nil`, the struct record is null. + # + # If this is `Array` or `Hash`, they are values of fields. + # + # @since 0.12.0 + def append_value(*args) + n_args = args.size + + case n_args + when 0 + append_value_raw + when 1 + value = args[0] + case value + when nil + append_null + when ::Array + append_value_raw + cached_field_builders.zip(value) do |builder, sub_value| + builder.append(sub_value) + end + when Hash + append_value_raw + local_name_to_builder = cached_name_to_builder.dup + value.each do |name, sub_value| + builder = local_name_to_builder.delete(name.to_s) + builder.append(sub_value) + end + local_name_to_builder.each do |_, builder| + builder.append_null + end + else + message = + "struct value must be nil, Array or Hash: #{value.inspect}" + raise ArgumentError, message + end + else + message = "wrong number of arguments (given #{n_args}, expected 0..1)" + raise ArgumentError, message + end + end + + def append_values(values, is_valids=nil) + if is_valids + is_valids.each_with_index do |is_valid, i| + if is_valid + append_value(values[i]) + else + append_null + end + end + else + values.each do |value| + append_value(value) + end + end + end + + alias_method :append_null_raw, :append_null + def append_null + append_null_raw + end + + # @since 0.12.0 + def append(*values) + if values.empty? + # For backward compatibility + append_value_raw + else + super + end + end + + private + def cached_field_builders + @field_builders ||= field_builders + end + + def build_name_to_builder + name_to_builder = {} + builders = cached_field_builders + value_data_type.fields.each_with_index do |field, i| + name_to_builder[field.name] = builders[i] + end + name_to_builder + end + + def cached_name_to_builder + @name_to_builder ||= build_name_to_builder + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/struct-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/struct-array.rb new file mode 100644 index 000000000..0b293dfc1 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/struct-array.rb @@ -0,0 +1,68 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class StructArray + # @param i [Integer] + # The index of the value to be gotten. You must specify the value index. + # + # You can use {Arrow::Array#[]} for convenient value access. + # + # @return [Hash] The `i`-th struct. + def get_value(i) + value = {} + value_data_type.fields.zip(fields) do |field, field_array| + value[field.name] = field_array[i] + end + value + end + + # @overload find_field(index) + # @param index [Integer] The index of the field to be found. + # @return [Arrow::Array, nil] + # The `index`-th field or `nil` for out of range. + # + # @overload find_field(name) + # @param index [String, Symbol] The name of the field to be found. + # @return [Arrow::Array, nil] + # The field that has `name` or `nil` for nonexistent name. + def find_field(index_or_name) + case index_or_name + when String, Symbol + name = index_or_name + (@name_to_field ||= build_name_to_field)[name.to_s] + else + index = index_or_name + fields[index] + end + end + + alias_method :fields_raw, :fields + def fields + @fields ||= fields_raw + end + + private + def build_name_to_field + name_to_field = {} + value_data_type.fields.zip(fields) do |field, field_array| + name_to_field[field.name] = field_array + end + name_to_field + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/struct-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/struct-data-type.rb new file mode 100644 index 000000000..a89a01689 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/struct-data-type.rb @@ -0,0 +1,128 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class StructDataType + include FieldContainable + + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::StructDataType}. + # + # @overload initialize(fields) + # + # @param fields [::Array<Arrow::Field, Hash>] The fields of the + # struct data type. You can also specify field description as + # a field. You can mix {Arrow::Field} and field description. + # + # See {Arrow::Field.new} how to specify field description. + # + # @example Create a struct data type with {Arrow::Field}s + # visible_field = Arrow::Field.new("visible", :boolean) + # count_field = Arrow::Field.new("count", :int32) + # Arrow::StructDataType.new([visible_field, count_field]) + # + # @example Create a struct data type with field descriptions + # field_descriptions = [ + # {name: "visible", type: :boolean}, + # {name: "count", type: :int32}, + # ] + # Arrow::StructDataType.new(field_descriptions) + # + # @example Create a struct data type with {Arrow::Field} and field description + # fields = [ + # Arrow::Field.new("visible", :boolean), + # {name: "count", type: :int32}, + # ] + # Arrow::StructDataType.new(fields) + # + # @overload initialize(fields) + # + # @param fields [Hash{String, Symbol => Arrow::DataType, Hash}] + # The pairs of field name and field data type of the struct + # data type. You can also specify data type description by + # `Hash`. You can mix {Arrow::DataType} and data type description. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @example Create a struct data type with {Arrow::DataType}s + # fields = { + # "visible" => Arrow::BooleanDataType.new, + # "count" => Arrow::Int32DataType.new, + # } + # Arrow::StructDataType.new(fields) + # + # @example Create a struct data type with data type descriptions + # fields = { + # "visible" => :boolean, + # "count" => {type: :int32}, + # } + # Arrow::StructDataType.new(fields) + # + # @example Create a struct data type with {Arrow::DataType} and data type description + # fields = { + # "visible" => Arrow::BooleanDataType.new, + # "count" => {type: :int32}, + # } + # Arrow::StructDataType.new(fields) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the struct data + # type. It must have `:fields` value. + # + # @option description + # [::Array<Arrow::Field, Hash>, + # Hash{String, Symbol => Arrow::DataType, Hash, String, Symbol}] + # :fields The fields of the struct data type. + # + # @example Create a struct data type with {Arrow::Field} and field description + # fields = [ + # Arrow::Field.new("visible", :boolean), + # {name: "count", type: :int32}, + # ] + # Arrow::StructDataType.new(fields: fields) + # + # @example Create a struct data type with {Arrow::DataType} and data type description + # fields = { + # "visible" => Arrow::BooleanDataType.new, + # "count" => {type: :int32}, + # } + # Arrow::StructDataType.new(fields: fields) + def initialize(fields) + if fields.is_a?(Hash) and fields.key?(:fields) + description = fields + fields = description[:fields] + end + if fields.is_a?(Hash) + fields = fields.collect do |name, data_type| + Field.new(name, data_type) + end + else + fields = fields.collect do |field| + field = Field.new(field) unless field.is_a?(Field) + field + end + end + initialize_raw(fields) + end + + alias_method :[], :find_field + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/symbol-values-appendable.rb b/src/arrow/ruby/red-arrow/lib/arrow/symbol-values-appendable.rb new file mode 100644 index 000000000..66ab0a490 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/symbol-values-appendable.rb @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + module SymbolValuesAppendable + def append_values(values, is_valids=nil) + builder = create_values_array_builder + values = values.collect do |value| + case value + when Symbol + value.to_s + else + value + end + end + builder.append_values(values, is_valids) + append_array(builder.finish) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/table-concatenate-options.rb b/src/arrow/ruby/red-arrow/lib/arrow/table-concatenate-options.rb new file mode 100644 index 000000000..730bce1c8 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/table-concatenate-options.rb @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class TableConcatenateOptions + class << self + # @api private + def try_convert(value) + case value + when Hash + options = new + value.each do |k, v| + options.public_send("#{k}=", value) + end + options + else + nil + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/table-formatter.rb b/src/arrow/ruby/red-arrow/lib/arrow/table-formatter.rb new file mode 100644 index 000000000..d039679f9 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/table-formatter.rb @@ -0,0 +1,190 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + # TODO: Almost codes should be implemented in Apache Arrow C++. + class TableFormatter + # @private + class ColumnFormatter + attr_reader :column + attr_reader :head_values + attr_reader :tail_values + attr_reader :sample_values + def initialize(column, head_values, tail_values) + @column = column + @head_values = head_values + @tail_values = tail_values + @sample_values = head_values + tail_values + @field_value_widths = {} + end + + def data_type + @data_type ||= @column.data_type + end + + def name + @name ||= @column.name + end + + def aligned_name + @aligned_name ||= format_aligned_name(name, data_type, @sample_values) + end + + FLOAT_N_DIGITS = 10 + FORMATTED_NULL = "(null)" + + def format_value(value, width=0) + case value + when ::Time + value.iso8601 + when Float + "%*f" % [[width, FLOAT_N_DIGITS].max, value] + when Integer + "%*d" % [width, value] + when Hash + formatted_values = data_type.fields.collect do |field| + field_name = field.name + field_value_width = compute_field_value_width(field, @sample_values) + formatted_name = format_value(field_name, 0) + formatted_value = format_value(value[field_name], field_value_width) + "#{formatted_name}: #{formatted_value}" + end + formatted = "{" + formatted << formatted_values.join(", ") + formatted << "}" + "%-*s" % [width, formatted] + when nil + "%*s" % [width, FORMATTED_NULL] + else + "%-*s" % [width, value.to_s] + end + end + + private + def compute_field_value_width(field, sample_values) + unless @field_value_widths.key?(field) + field_name = field.name + field_sample_values = sample_values.collect do |v| + (v || {})[field_name] + end + field_aligned_name = format_aligned_name("", + field.data_type, + field_sample_values) + @field_value_widths[field] = field_aligned_name.size + end + @field_value_widths[field] + end + + def format_aligned_name(name, data_type, sample_values) + case data_type + when TimestampDataType + "%*s" % [::Time.now.iso8601.size, name] + when IntegerDataType + have_null = false + have_negative = false + max_value = nil + sample_values.each do |value| + if value.nil? + have_null = true + else + if max_value.nil? + max_value = value.abs + else + max_value = [value.abs, max_value].max + end + have_negative = true if value.negative? + end + end + if max_value.nil? + width = 0 + elsif max_value.zero? + width = 1 + else + width = (Math.log10(max_value) + 1).truncate + end + width += 1 if have_negative # Need "-" + width = [width, FORMATTED_NULL.size].max if have_null + "%*s" % [width, name] + when FloatDataType, DoubleDataType + "%*s" % [FLOAT_N_DIGITS, name] + when StructDataType + field_widths = data_type.fields.collect do |field| + field_value_width = compute_field_value_width(field, sample_values) + field.name.size + ": ".size + field_value_width + end + width = "{}".size + field_widths.sum + if field_widths.size > 0 + width += (", ".size * (field_widths.size - 1)) + end + "%*s" % [width, name] + else + name + end + end + end + + def initialize(table, options={}) + @table = table + @options = options + end + + def format + text = "" + n_rows = @table.n_rows + border = @options[:border] || 10 + + head_limit = [border, n_rows].min + + tail_start = [border, n_rows - border].max + tail_limit = n_rows - tail_start + + column_formatters = @table.columns.collect do |column| + head_values = column.each.take(head_limit) + if tail_limit > 0 + tail_values = column.reverse_each.take(tail_limit).reverse + else + tail_values = [] + end + ColumnFormatter.new(column, head_values, tail_values) + end + + format_header(text, column_formatters) + return text if n_rows.zero? + + n_digits = (Math.log10(n_rows) + 1).truncate + format_rows(text, + column_formatters, + column_formatters.collect(&:head_values).transpose, + n_digits, + 0) + return text if n_rows <= border + + + if head_limit != tail_start + format_ellipsis(text) + end + + format_rows(text, + column_formatters, + column_formatters.collect(&:tail_values).transpose, + n_digits, + tail_start) + + text + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/table-list-formatter.rb b/src/arrow/ruby/red-arrow/lib/arrow/table-list-formatter.rb new file mode 100644 index 000000000..4fe293416 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/table-list-formatter.rb @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + # TODO: Almost codes should be implemented in Apache Arrow C++. + class TableListFormatter < TableFormatter + private + def format_header(text, columns) + end + + def format_rows(text, column_formatters, rows, n_digits, start_offset) + rows.each_with_index do |row, nth_row| + text << ("=" * 20 + " #{start_offset + nth_row} " + "=" * 20 + "\n") + row.each_with_index do |column_value, nth_column| + column_formatter = column_formatters[nth_column] + formatted_name = column_formatter.name + formatted_value = column_formatter.format_value(column_value) + text << "#{formatted_name}: #{formatted_value}\n" + end + end + end + + def format_ellipsis(text) + text << "...\n" + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/table-loader.rb b/src/arrow/ruby/red-arrow/lib/arrow/table-loader.rb new file mode 100644 index 000000000..8f43b69df --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/table-loader.rb @@ -0,0 +1,225 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "uri" + +module Arrow + class TableLoader + class << self + def load(input, options={}) + new(input, options).load + end + end + + def initialize(input, options={}) + input = input.to_path if input.respond_to?(:to_path) + @input = input + @options = options + fill_options + end + + def load + if @input.is_a?(URI) + custom_load_method = "load_from_uri" + elsif @input.is_a?(String) and ::File.directory?(@input) + custom_load_method = "load_from_directory" + else + custom_load_method = "load_from_file" + end + unless respond_to?(custom_load_method, true) + available_schemes = [] + (methods(true) | private_methods(true)).each do |name| + match_data = /\Aload_from_/.match(name.to_s) + if match_data + available_schemes << match_data.post_match + end + end + message = "Arrow::Table load source must be one of [" + message << available_schemes.join(", ") + message << "]: #{@input.inspect}" + raise ArgumentError, message + end + __send__(custom_load_method) + end + + private + def load_from_file + format = @options[:format] + custom_load_method = "load_as_#{format}" + unless respond_to?(custom_load_method, true) + available_formats = [] + (methods(true) | private_methods(true)).each do |name| + match_data = /\Aload_as_/.match(name.to_s) + if match_data + available_formats << match_data.post_match + end + end + deprecated_formats = ["batch", "stream"] + available_formats -= deprecated_formats + message = "Arrow::Table load format must be one of [" + message << available_formats.join(", ") + message << "]: #{format.inspect}" + raise ArgumentError, message + end + if method(custom_load_method).arity.zero? + __send__(custom_load_method) + else + # For backward compatibility. + __send__(custom_load_method, @input) + end + end + + def fill_options + if @options[:format] and @options.key?(:compression) + return + end + + case @input + when Buffer + info = {} + when URI + extension = PathExtension.new(@input.path) + info = extension.extract + else + extension = PathExtension.new(@input) + info = extension.extract + end + format = info[:format] + @options = @options.dup + if format + @options[:format] ||= format.to_sym + else + @options[:format] ||= :arrow + end + unless @options.key?(:compression) + @options[:compression] = info[:compression] + end + end + + def open_input_stream + if @input.is_a?(Buffer) + BufferInputStream.new(@input) + else + MemoryMappedInputStream.new(@input) + end + end + + def load_raw(input, reader) + schema = reader.schema + record_batches = [] + reader.each do |record_batch| + record_batches << record_batch + end + table = Table.new(schema, record_batches) + table.instance_variable_set(:@input, input) + table + end + + def load_as_arrow + input = nil + reader = nil + error = nil + reader_class_candidates = [ + RecordBatchFileReader, + RecordBatchStreamReader, + ] + reader_class_candidates.each do |reader_class_candidate| + input = open_input_stream + begin + reader = reader_class_candidate.new(input) + rescue Arrow::Error + error = $! + else + break + end + end + raise error if reader.nil? + load_raw(input, reader) + end + + # @since 1.0.0 + def load_as_arrow_file + input = open_input_stream + reader = RecordBatchFileReader.new(input) + load_raw(input, reader) + end + + # @deprecated Use `format: :arrow_file` instead. + def load_as_batch + load_as_arrow_file + end + + # @since 1.0.0 + def load_as_arrow_streaming + input = open_input_stream + reader = RecordBatchStreamReader.new(input) + load_raw(input, reader) + end + + # @deprecated Use `format: :arrow_streaming` instead. + def load_as_stream + load_as_arrow_streaming + end + + if Arrow.const_defined?(:ORCFileReader) + def load_as_orc + input = open_input_stream + reader = ORCFileReader.new(input) + field_indexes = @options[:field_indexes] + reader.set_field_indexes(field_indexes) if field_indexes + table = reader.read_stripes + table.instance_variable_set(:@input, input) + table + end + end + + def csv_load(options) + options.delete(:format) + if @input.is_a?(Buffer) + CSVLoader.load(@input.data.to_s, **options) + else + CSVLoader.load(Pathname.new(@input), **options) + end + end + + def load_as_csv + csv_load(@options.dup) + end + + def load_as_tsv + options = @options.dup + options[:delimiter] = "\t" + csv_load(options.dup) + end + + def load_as_feather + input = open_input_stream + reader = FeatherFileReader.new(input) + table = reader.read + table.instance_variable_set(:@input, input) + table + end + + def load_as_json + input = open_input_stream + reader = JSONReader.new(input) + table = reader.read + table.instance_variable_set(:@input, input) + table + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/table-saver.rb b/src/arrow/ruby/red-arrow/lib/arrow/table-saver.rb new file mode 100644 index 000000000..207a10a82 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/table-saver.rb @@ -0,0 +1,195 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class TableSaver + class << self + def save(table, output, options={}) + new(table, output, options).save + end + end + + def initialize(table, output, options={}) + @table = table + output = output.to_path if output.respond_to?(:to_path) + @output = output + @options = options + fill_options + end + + def save + if @output.is_a?(URI) + custom_save_method = "save_to_uri" + else + custom_save_method = "save_to_file" + end + unless respond_to?(custom_save_method, true) + available_schemes = [] + (methods(true) | private_methods(true)).each do |name| + match_data = /\Asave_to_/.match(name.to_s) + if match_data + available_schemes << match_data.post_match + end + end + message = "Arrow::Table save source must be one of [" + message << available_schemes.join(", ") + message << "]: #{@output.scheme.inspect}" + raise ArgumentError, message + end + __send__(custom_save_method) + end + + private + def save_to_file + format = @options[:format] + custom_save_method = "save_as_#{format}" + unless respond_to?(custom_save_method, true) + available_formats = [] + (methods(true) | private_methods(true)).each do |name| + match_data = /\Asave_as_/.match(name.to_s) + if match_data + available_formats << match_data.post_match + end + end + deprecated_formats = ["batch", "stream"] + available_formats -= deprecated_formats + message = "Arrow::Table save format must be one of [" + message << available_formats.join(", ") + message << "]: #{format.inspect}" + raise ArgumentError, message + end + if method(custom_save_method).arity.zero? + __send__(custom_save_method) + else + # For backward compatibility. + __send__(custom_save_method, @output) + end + end + + def fill_options + if @options[:format] and @options.key?(:compression) + return + end + + case @output + when Buffer + info = {} + when URI + extension = PathExtension.new(@output.path) + info = extension.extract + else + extension = PathExtension.new(@output) + info = extension.extract + end + format = info[:format] + @options = @options.dup + if format + @options[:format] ||= format.to_sym + else + @options[:format] ||= :arrow + end + unless @options.key?(:compression) + @options[:compression] = info[:compression] + end + end + + def open_raw_output_stream(&block) + if @output.is_a?(Buffer) + BufferOutputStream.open(@output, &block) + else + FileOutputStream.open(@output, false, &block) + end + end + + def open_output_stream(&block) + compression = @options[:compression] + if compression + codec = Codec.new(compression) + open_raw_output_stream do |raw_output| + CompressedOutputStream.open(codec, raw_output) do |output| + yield(output) + end + end + else + open_raw_output_stream(&block) + end + end + + def save_raw(writer_class) + open_output_stream do |output| + writer_class.open(output, @table.schema) do |writer| + writer.write_table(@table) + end + end + end + + def save_as_arrow + save_as_arrow_file + end + + # @since 1.0.0 + def save_as_arrow_file + save_raw(RecordBatchFileWriter) + end + + # @deprecated Use `format: :arrow_batch` instead. + def save_as_batch + save_as_arrow_file + end + + # @since 1.0.0 + def save_as_arrow_streaming + save_raw(RecordBatchStreamWriter) + end + + # @deprecated Use `format: :arrow_streaming` instead. + def save_as_stream + save_as_arrow_streaming + end + + def csv_save(**options) + open_output_stream do |output| + csv = CSV.new(output, **options) + names = @table.schema.fields.collect(&:name) + csv << names + @table.raw_records.each do |record| + csv << record + end + end + end + + def save_as_csv + csv_save + end + + def save_as_tsv + csv_save(col_sep: "\t") + end + + def save_as_feather + properties = FeatherWriteProperties.new + properties.class.properties.each do |name| + value = @options[name.to_sym] + next if value.nil? + properties.__send__("#{name}=", value) + end + open_raw_output_stream do |output| + @table.write_as_feather(output, properties) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/table-table-formatter.rb b/src/arrow/ruby/red-arrow/lib/arrow/table-table-formatter.rb new file mode 100644 index 000000000..36121e1b6 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/table-table-formatter.rb @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "time" + +module Arrow + # TODO: Almost codes should be implemented in Apache Arrow C++. + class TableTableFormatter < TableFormatter + private + def format_header(text, column_formatters) + column_formatters.each do |column_formatter| + text << "\t" + text << column_formatter.aligned_name + end + text << "\n" + end + + def format_rows(text, column_formatters, rows, n_digits, start_offset) + rows.each_with_index do |row, nth_row| + text << ("%*d" % [n_digits, start_offset + nth_row]) + row.each_with_index do |column_value, nth_column| + text << "\t" + column_formatter = column_formatters[nth_column] + aligned_name = column_formatter.aligned_name + text << column_formatter.format_value(column_value, aligned_name.size) + end + text << "\n" + end + end + + def format_ellipsis(text) + text << "...\n" + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/table.rb b/src/arrow/ruby/red-arrow/lib/arrow/table.rb new file mode 100644 index 000000000..e8aa39bac --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/table.rb @@ -0,0 +1,519 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "arrow/raw-table-converter" + +module Arrow + class Table + include ColumnContainable + include GenericFilterable + include GenericTakeable + include RecordContainable + + class << self + def load(path, options={}) + TableLoader.load(path, options) + end + end + + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::Table}. + # + # @overload initialize(columns) + # + # @param columns [::Array<Arrow::Column>] The columns of the table. + # + # @example Create a table from columns + # count_field = Arrow::Field.new("count", :uint32) + # count_array = Arrow::UInt32Array.new([0, 2, nil, 4]) + # count_column = Arrow::Column.new(count_field, count_array) + # visible_field = Arrow::Field.new("visible", :boolean) + # visible_array = Arrow::BooleanArray.new([true, nil, nil, false]) + # visible_column = Arrow::Column.new(visible_field, visible_array) + # Arrow::Table.new([count_column, visible_column]) + # + # @overload initialize(raw_table) + # + # @param raw_table [Hash<String, Arrow::Array>] + # The pairs of column name and values of the table. Column values is + # `Arrow::Array`. + # + # @example Create a table from column name and values + # Arrow::Table.new("count" => Arrow::UInt32Array.new([0, 2, nil, 4]), + # "visible" => Arrow::BooleanArray.new([true, nil, nil, false])) + # + # @overload initialize(raw_table) + # + # @param raw_table [Hash<String, Arrow::ChunkedArray>] + # The pairs of column name and values of the table. Column values is + # `Arrow::ChunkedArray`. + # + # @example Create a table from column name and values + # count_chunks = [ + # Arrow::UInt32Array.new([0, 2]), + # Arrow::UInt32Array.new([nil, 4]), + # ] + # visible_chunks = [ + # Arrow::BooleanArray.new([true]), + # Arrow::BooleanArray.new([nil, nil, false]), + # ] + # Arrow::Table.new("count" => Arrow::ChunkedArray.new(count_chunks), + # "visible" => Arrow::ChunkedArray.new(visible_chunks)) + # + # @overload initialize(raw_table) + # + # @param raw_table [Hash<String, ::Array>] + # The pairs of column name and values of the table. Column values is + # `Array`. + # + # @example Create a table from column name and values + # Arrow::Table.new("count" => [0, 2, nil, 4], + # "visible" => [true, nil, nil, false]) + # + # @overload initialize(schema, columns) + # + # @param schema [Arrow::Schema] The schema of the table. + # You can also specify schema as primitive Ruby objects. + # See {Arrow::Schema#initialize} for details. + # + # @param columns [::Array<Arrow::Column>] The data of the table. + # + # @example Create a table from schema and columns + # count_field = Arrow::Field.new("count", :uint32) + # count_array = Arrow::UInt32Array.new([0, 2, nil, 4]) + # count_column = Arrow::Column.new(count_field, count_array) + # visible_field = Arrow::Field.new("visible", :boolean) + # visible_array = Arrow::BooleanArray.new([true, nil, nil, false]) + # visible_column = Arrow::Column.new(visible_field, visible_array) + # Arrow::Table.new(Arrow::Schema.new([count_field, visible_field]), + # [count_column, visible_column]) + # + # @overload initialize(schema, arrays) + # + # @param schema [Arrow::Schema] The schema of the table. + # You can also specify schema as primitive Ruby objects. + # See {Arrow::Schema#initialize} for details. + # + # @param arrays [::Array<Arrow::Array>] The data of the table. + # + # @example Create a table from schema and arrays + # count_field = Arrow::Field.new("count", :uint32) + # count_array = Arrow::UInt32Array.new([0, 2, nil, 4]) + # visible_field = Arrow::Field.new("visible", :boolean) + # visible_array = Arrow::BooleanArray.new([true, nil, nil, false]) + # Arrow::Table.new(Arrow::Schema.new([count_field, visible_field]), + # [count_array, visible_array]) + # + # @overload initialize(schema, record_batches) + # + # @param schema [Arrow::Schema] The schema of the table. + # You can also specify schema as primitive Ruby objects. + # See {Arrow::Schema#initialize} for details. + # + # @param arrays [::Array<Arrow::RecordBatch>] The data of the table. + # + # @example Create a table from schema and record batches + # count_field = Arrow::Field.new("count", :uint32) + # visible_field = Arrow::Field.new("visible", :boolean) + # schema = Arrow::Schema.new([count_field, visible_field]) + # record_batches = [ + # Arrow::RecordBatch.new(schema, [[0, true], [2, nil], [nil, nil]]), + # Arrow::RecordBatch.new(schema, [[4, false]]), + # ] + # Arrow::Table.new(schema, record_batches) + # + # @overload initialize(schema, raw_records) + # + # @param schema [Arrow::Schema] The schema of the table. + # You can also specify schema as primitive Ruby objects. + # See {Arrow::Schema#initialize} for details. + # + # @param arrays [::Array<::Array>] The data of the table as primitive + # Ruby objects. + # + # @example Create a table from schema and raw records + # schema = { + # count: :uint32, + # visible: :boolean, + # } + # raw_records = [ + # [0, true], + # [2, nil], + # [nil, nil], + # [4, false], + # ] + # Arrow::Table.new(schema, raw_records) + def initialize(*args) + n_args = args.size + case n_args + when 1 + raw_table_converter = RawTableConverter.new(args[0]) + schema = raw_table_converter.schema + values = raw_table_converter.values + when 2 + schema = args[0] + schema = Schema.new(schema) unless schema.is_a?(Schema) + values = args[1] + case values[0] + when ::Array + values = [RecordBatch.new(schema, values)] + when Column + values = values.collect(&:data) + end + else + message = "wrong number of arguments (given #{n_args}, expected 1..2)" + raise ArgumentError, message + end + initialize_raw(schema, values) + end + + def each_record_batch + return to_enum(__method__) unless block_given? + + reader = TableBatchReader.new(self) + while record_batch = reader.read_next + yield(record_batch) + end + end + + alias_method :size, :n_rows + alias_method :length, :n_rows + + alias_method :slice_raw, :slice + + # @overload slice(offset, length) + # + # @param offset [Integer] The offset of sub Arrow::Table. + # @param length [Integer] The length of sub Arrow::Table. + # @return [Arrow::Table] + # The sub `Arrow::Table` that covers only from + # `offset` to `offset + length` range. + # + # @overload slice(index) + # + # @param index [Integer] The index in this table. + # @return [Arrow::Record] + # The `Arrow::Record` corresponding to index of + # the table. + # + # @overload slice(booleans) + # + # @param booleans [::Array<Boolean>] + # The values indicating the target rows. + # @return [Arrow::Table] + # The sub `Arrow::Table` that covers only rows of indices + # the values of `booleans` is true. + # + # @overload slice(boolean_array) + # + # @param boolean_array [::Array<Arrow::BooleanArray>] + # The values indicating the target rows. + # @return [Arrow::Table] + # The sub `Arrow::Table` that covers only rows of indices + # the values of `boolean_array` is true. + # + # @overload slice(range) + # + # @param range_included_end [Range] The range indicating the target rows. + # @return [Arrow::Table] + # The sub `Arrow::Table` that covers only rows of the range of indices. + # + # @overload slice(conditions) + # + # @param conditions [Hash] The conditions to select records. + # @return [Arrow::Table] + # The sub `Arrow::Table` that covers only rows matched by condition + # + # @overload slice + # + # @yield [slicer] Gives slicer that constructs condition to select records. + # @yieldparam slicer [Arrow::Slicer] The slicer that helps us to + # build condition. + # @yieldreturn [Arrow::Slicer::Condition, ::Array<Arrow::Slicer::Condition>] + # The condition to select records. + # @return [Arrow::Table] + # The sub `Arrow::Table` that covers only rows matched by condition + # specified by slicer. + def slice(*args) + slicers = [] + if block_given? + unless args.empty? + raise ArgumentError, "must not specify both arguments and block" + end + block_slicer = yield(Slicer.new(self)) + case block_slicer + when ::Array + slicers.concat(block_slicer) + else + slicers << block_slicer + end + else + expected_n_args = nil + case args.size + when 1 + case args[0] + when Integer + index = args[0] + index += n_rows if index < 0 + return nil if index < 0 + return nil if index >= n_rows + return Record.new(self, index) + when Hash + condition_pairs = args[0] + slicer = Slicer.new(self) + conditions = [] + condition_pairs.each do |key, value| + case value + when Range + # TODO: Optimize "begin <= key <= end" case by missing "between" kernel + # https://issues.apache.org/jira/browse/ARROW-9843 + unless value.begin.nil? + conditions << (slicer[key] >= value.begin) + end + unless value.end.nil? + if value.exclude_end? + conditions << (slicer[key] < value.end) + else + conditions << (slicer[key] <= value.end) + end + end + else + conditions << (slicer[key] == value) + end + end + slicers << conditions.inject(:&) + else + slicers << args[0] + end + when 2 + offset, length = args + slicers << (offset...(offset + length)) + else + expected_n_args = "1..2" + end + if expected_n_args + message = "wrong number of arguments " + + "(given #{args.size}, expected #{expected_n_args})" + raise ArgumentError, message + end + end + + filter_options = Arrow::FilterOptions.new + filter_options.null_selection_behavior = :emit_null + sliced_tables = [] + slicers.each do |slicer| + slicer = slicer.evaluate if slicer.respond_to?(:evaluate) + case slicer + when Integer + slicer += n_rows if slicer < 0 + sliced_tables << slice_by_range(slicer, n_rows - 1) + when Range + original_from = from = slicer.first + to = slicer.last + to -= 1 if slicer.exclude_end? + from += n_rows if from < 0 + if from < 0 or from >= n_rows + message = + "offset is out of range (-#{n_rows + 1},#{n_rows}): " + + "#{original_from}" + raise ArgumentError, message + end + to += n_rows if to < 0 + sliced_tables << slice_by_range(from, to) + when ::Array, BooleanArray, ChunkedArray + sliced_tables << filter(slicer, filter_options) + else + message = "slicer must be Integer, Range, (from, to), " + + "Arrow::ChunkedArray of Arrow::BooleanArray, " + + "Arrow::BooleanArray or Arrow::Slicer::Condition: #{slicer.inspect}" + raise ArgumentError, message + end + end + if sliced_tables.size > 1 + sliced_tables[0].concatenate(sliced_tables[1..-1]) + else + sliced_tables[0] + end + end + + # TODO + # + # @return [Arrow::Table] + def merge(other) + added_columns = {} + removed_columns = {} + + case other + when Hash + other.each do |name, value| + name = name.to_s + if value + added_columns[name] = ensure_raw_column(name, value) + else + removed_columns[name] = true + end + end + when Table + added_columns = {} + other.columns.each do |column| + name = column.name + added_columns[name] = ensure_raw_column(name, column) + end + else + message = "merge target must be Hash or Arrow::Table: " + + "<#{other.inspect}>: #{inspect}" + raise ArgumentError, message + end + + new_columns = [] + columns.each do |column| + column_name = column.name + new_column = added_columns.delete(column_name) + if new_column + new_columns << new_column + next + end + next if removed_columns.key?(column_name) + new_columns << ensure_raw_column(column_name, column) + end + added_columns.each do |name, new_column| + new_columns << new_column + end + new_fields = [] + new_arrays = [] + new_columns.each do |new_column| + new_fields << new_column[:field] + new_arrays << new_column[:data] + end + self.class.new(new_fields, new_arrays) + end + + alias_method :remove_column_raw, :remove_column + def remove_column(name_or_index) + case name_or_index + when String, Symbol + name = name_or_index.to_s + index = columns.index {|column| column.name == name} + if index.nil? + message = "unknown column: #{name_or_index.inspect}: #{inspect}" + raise KeyError.new(message) + end + else + index = name_or_index + index += n_columns if index < 0 + if index < 0 or index >= n_columns + message = "out of index (0..#{n_columns - 1}): " + + "#{name_or_index.inspect}: #{inspect}" + raise IndexError.new(message) + end + end + remove_column_raw(index) + end + + # Experimental + def group(*keys) + Group.new(self, keys) + end + + # Experimental + def window(size: nil) + RollingWindow.new(self, size) + end + + def save(output, options={}) + saver = TableSaver.new(self, output, options) + saver.save + end + + def pack + packed_arrays = columns.collect do |column| + column.data.pack + end + self.class.new(schema, packed_arrays) + end + + alias_method :to_s_raw, :to_s + def to_s(options={}) + format = options[:format] + case format + when :column + return to_s_raw + when :list + formatter_class = TableListFormatter + when :table, nil + formatter_class = TableTableFormatter + else + message = ":format must be :column, :list, :table or nil" + raise ArgumentError, "#{message}: <#{format.inspect}>" + end + formatter = formatter_class.new(self, options) + formatter.format + end + + alias_method :inspect_raw, :inspect + def inspect + "#{super}\n#{to_s}" + end + + def respond_to_missing?(name, include_private) + return true if find_column(name) + super + end + + def method_missing(name, *args, &block) + if args.empty? + column = find_column(name) + return column if column + end + super + end + + private + def slice_by_range(from, to) + slice_raw(from, to - from + 1) + end + + def ensure_raw_column(name, data) + case data + when Array + { + field: Field.new(name, data.value_data_type), + data: ChunkedArray.new([data]), + } + when ChunkedArray + { + field: Field.new(name, data.value_data_type), + data: data, + } + when Column + column = data + data = column.data + data = ChunkedArray.new([data]) unless data.is_a?(ChunkedArray) + { + field: column.field, + data: data, + } + else + message = "column must be Arrow::Array or Arrow::Column: " + + "<#{name}>: <#{data.inspect}>: #{inspect}" + raise ArgumentError, message + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/tensor.rb b/src/arrow/ruby/red-arrow/lib/arrow/tensor.rb new file mode 100644 index 000000000..fdcc6c1ae --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/tensor.rb @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Tensor + def to_arrow + self + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/time.rb b/src/arrow/ruby/red-arrow/lib/arrow/time.rb new file mode 100644 index 000000000..3d25a6403 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/time.rb @@ -0,0 +1,159 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Time + attr_reader :unit + attr_reader :value + def initialize(unit, value) + @unit = unit + @value = value + @unconstructed = false + end + + def ==(other) + other.is_a?(self.class) and + positive? == other.positive? and + hour == other.hour and + minute == other.minute and + second == other.second and + nano_second == other.nano_second + end + + def cast(target_unit) + return self.class.new(@unit, @value) if @unit == target_unit + + target_value = (hour * 60 * 60) + (minute * 60) + second + case target_unit + when TimeUnit::MILLI + target_value *= 1000 + target_value += nano_second / 1000 / 1000 + when TimeUnit::MICRO + target_value *= 1000 * 1000 + target_value += nano_second / 1000 + when TimeUnit::NANO + target_value *= 1000 * 1000 * 1000 + target_value += nano_second + end + target_value = -target_value if negative? + self.class.new(target_unit, target_value) + end + + def to_f + case @unit + when TimeUnit::SECOND + @value.to_f + when TimeUnit::MILLI + @value.to_f / 1000.0 + when TimeUnit::MICRO + @value.to_f / 1000.0 / 1000.0 + when TimeUnit::NANO + @value.to_f / 1000.0 / 1000.0 / 1000.0 + end + end + + def positive? + @value.positive? + end + + def negative? + @value.negative? + end + + def hour + unconstruct + @hour + end + + def minute + unconstruct + @minute + end + alias_method :min, :minute + + def second + unconstruct + @second + end + alias_method :sec, :second + + def nano_second + unconstruct + @nano_second + end + alias_method :nsec, :nano_second + + def to_s + unconstruct + if @nano_second.zero? + nano_second_string = "" + else + nano_second_string = (".%09d" % @nano_second).gsub(/0+\z/, "") + end + "%s%02d:%02d:%02d%s" % [ + @value.negative? ? "-" : "", + @hour, + @minute, + @second, + nano_second_string, + ] + end + + private + def unconstruct + return if @unconstructed + abs_value = @value.abs + case unit + when TimeUnit::SECOND + unconstruct_second(abs_value) + @nano_second = 0 + when TimeUnit::MILLI + unconstruct_second(abs_value / 1000) + @nano_second = (abs_value % 1000) * 1000 * 1000 + when TimeUnit::MICRO + unconstruct_second(abs_value / 1000 / 1000) + @nano_second = (abs_value % (1000 * 1000)) * 1000 + when TimeUnit::NANO + unconstruct_second(abs_value / 1000 / 1000 / 1000) + @nano_second = abs_value % (1000 * 1000 * 1000) + else + raise ArgumentError, "invalid unit: #{@unit.inspect}" + end + @unconstructed = true + end + + def unconstruct_second(abs_value_in_second) + if abs_value_in_second < 60 + hour = 0 + minute = 0 + second = abs_value_in_second + elsif abs_value_in_second < (60 * 60) + hour = 0 + minute = abs_value_in_second / 60 + second = abs_value_in_second % 60 + else + in_minute = abs_value_in_second / 60 + hour = in_minute / 60 + minute = in_minute % 60 + second = abs_value_in_second % 60 + end + @hour = hour + @minute = minute + @second = second + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/time32-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/time32-array-builder.rb new file mode 100644 index 000000000..088f37c4e --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/time32-array-builder.rb @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Time32ArrayBuilder + class << self + def build(unit_or_data_type, values) + builder = new(unit_or_data_type) + builder.build(values) + end + end + + alias_method :initialize_raw, :initialize + def initialize(unit_or_data_type) + case unit_or_data_type + when DataType + data_type = unit_or_data_type + else + unit = unit_or_data_type + data_type = Time32DataType.new(unit) + end + initialize_raw(data_type) + end + + def unit + @unit ||= value_data_type.unit + end + + private + def convert_to_arrow_value(value) + return value unless value.is_a?(Time) + value.cast(unit).value + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/time32-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/time32-array.rb new file mode 100644 index 000000000..e01dd9732 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/time32-array.rb @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Time32Array + def get_value(i) + Time.new(unit, get_raw_value(i)) + end + + def unit + @unit ||= value_data_type.unit + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/time32-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/time32-data-type.rb new file mode 100644 index 000000000..be1d04fa2 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/time32-data-type.rb @@ -0,0 +1,61 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Time32DataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::Time32DataType}. + # + # @overload initialize(unit) + # + # @param unit [Arrow::TimeUnit, Symbol] The unit of the + # time32 data type. + # + # The unit must be second or millisecond. + # + # @example Create a time32 data type with Arrow::TimeUnit + # Arrow::Time32DataType.new(Arrow::TimeUnit::MILLI) + # + # @example Create a time32 data type with Symbol + # Arrow::Time32DataType.new(:milli) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the time32 data + # type. It must have `:unit` value. + # + # @option description [Arrow::TimeUnit, Symbol] :unit The unit of + # the time32 data type. + # + # The unit must be second or millisecond. + # + # @example Create a time32 data type with Arrow::TimeUnit + # Arrow::Time32DataType.new(unit: Arrow::TimeUnit::MILLI) + # + # @example Create a time32 data type with Symbol + # Arrow::Time32DataType.new(unit: :milli) + def initialize(unit) + if unit.is_a?(Hash) + description = unit + unit = description[:unit] + end + initialize_raw(unit) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/time64-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/time64-array-builder.rb new file mode 100644 index 000000000..dec15b8bf --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/time64-array-builder.rb @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Time64ArrayBuilder + class << self + def build(unit_or_data_type, values) + builder = new(unit_or_data_type) + builder.build(values) + end + end + + alias_method :initialize_raw, :initialize + def initialize(unit_or_data_type) + case unit_or_data_type + when DataType + data_type = unit_or_data_type + else + unit = unit_or_data_type + data_type = Time64DataType.new(unit) + end + initialize_raw(data_type) + end + + def unit + @unit ||= value_data_type.unit + end + + private + def convert_to_arrow_value(value) + return value unless value.is_a?(Time) + value.cast(unit).value + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/time64-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/time64-array.rb new file mode 100644 index 000000000..7fc2fd9ab --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/time64-array.rb @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Time64Array + def get_value(i) + Time.new(unit, get_raw_value(i)) + end + + def unit + @unit ||= value_data_type.unit + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/time64-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/time64-data-type.rb new file mode 100644 index 000000000..13795aa83 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/time64-data-type.rb @@ -0,0 +1,61 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Time64DataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::Time64DataType}. + # + # @overload initialize(unit) + # + # @param unit [Arrow::TimeUnit, Symbol] The unit of the + # time64 data type. + # + # The unit must be microsecond or nanosecond. + # + # @example Create a time64 data type with Arrow::TimeUnit + # Arrow::Time64DataType.new(Arrow::TimeUnit::NANO) + # + # @example Create a time64 data type with Symbol + # Arrow::Time64DataType.new(:nano) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the time64 data + # type. It must have `:unit` value. + # + # @option description [Arrow::TimeUnit, Symbol] :unit The unit of + # the time64 data type. + # + # The unit must be microsecond or nanosecond. + # + # @example Create a time64 data type with Arrow::TimeUnit + # Arrow::Time64DataType.new(unit: Arrow::TimeUnit::NANO) + # + # @example Create a time64 data type with Symbol + # Arrow::Time64DataType.new(unit: :nano) + def initialize(unit) + if unit.is_a?(Hash) + description = unit + unit = description[:unit] + end + initialize_raw(unit) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/timestamp-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/timestamp-array-builder.rb new file mode 100644 index 000000000..68bcb0fec --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/timestamp-array-builder.rb @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class TimestampArrayBuilder + class << self + def build(unit_or_data_type, values) + builder = new(unit_or_data_type) + builder.build(values) + end + end + + alias_method :initialize_raw, :initialize + def initialize(unit_or_data_type) + case unit_or_data_type + when DataType + data_type = unit_or_data_type + else + unit = unit_or_data_type + data_type = TimestampDataType.new(unit) + end + initialize_raw(data_type) + end + + private + def unit_id + @unit_id ||= value_data_type.unit.nick.to_sym + end + + def convert_to_arrow_value(value) + if value.respond_to?(:to_time) and not value.is_a?(Time) + value = value.to_time + end + + if value.is_a?(::Time) + case unit_id + when :second + value.to_i + when :milli + value.to_i * 1_000 + value.usec / 1_000 + when :micro + value.to_i * 1_000_000 + value.usec + else + value.to_i * 1_000_000_000 + value.nsec + end + else + value + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/timestamp-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/timestamp-array.rb new file mode 100644 index 000000000..011273487 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/timestamp-array.rb @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class TimestampArray + def get_value(i) + cast_to_time(get_raw_value(i)) + end + + def unit + @unit ||= value_data_type.unit + end + + private + def cast_to_time(raw_value) + case unit + when TimeUnit::SECOND + ::Time.at(raw_value) + when TimeUnit::MILLI + ::Time.at(*raw_value.divmod(1_000)) + when TimeUnit::MICRO + ::Time.at(*raw_value.divmod(1_000_000)) + else + ::Time.at(raw_value / 1_000_000_000.0) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/timestamp-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/timestamp-data-type.rb new file mode 100644 index 000000000..cd91f567d --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/timestamp-data-type.rb @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class TimestampDataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::TimestampDataType}. + # + # @overload initialize(unit) + # + # @param unit [Arrow::TimeUnit, Symbol] The unit of the + # timestamp data type. + # + # @example Create a timestamp data type with Arrow::TimeUnit + # Arrow::TimestampDataType.new(Arrow::TimeUnit::MILLI) + # + # @example Create a timestamp data type with Symbol + # Arrow::TimestampDataType.new(:milli) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the timestamp data + # type. It must have `:unit` value. + # + # @option description [Arrow::TimeUnit, Symbol] :unit The unit of + # the timestamp data type. + # + # @example Create a timestamp data type with Arrow::TimeUnit + # Arrow::TimestampDataType.new(unit: Arrow::TimeUnit::MILLI) + # + # @example Create a timestamp data type with Symbol + # Arrow::TimestampDataType.new(unit: :milli) + def initialize(unit) + if unit.is_a?(Hash) + description = unit + unit = description[:unit] + end + initialize_raw(unit) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/version.rb b/src/arrow/ruby/red-arrow/lib/arrow/version.rb new file mode 100644 index 000000000..f830ff895 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/version.rb @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + VERSION = "6.0.1" + + module Version + numbers, TAG = VERSION.split("-") + MAJOR, MINOR, MICRO = numbers.split(".").collect(&:to_i) + STRING = VERSION + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/writable.rb b/src/arrow/ruby/red-arrow/lib/arrow/writable.rb new file mode 100644 index 000000000..02be9ddfc --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/writable.rb @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + module Writable + alias_method :<<, :write + end +end diff --git a/src/arrow/ruby/red-arrow/red-arrow.gemspec b/src/arrow/ruby/red-arrow/red-arrow.gemspec new file mode 100644 index 000000000..2fcc31b60 --- /dev/null +++ b/src/arrow/ruby/red-arrow/red-arrow.gemspec @@ -0,0 +1,67 @@ +# -*- ruby -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require_relative "lib/arrow/version" + +Gem::Specification.new do |spec| + spec.name = "red-arrow" + version_components = [ + Arrow::Version::MAJOR.to_s, + Arrow::Version::MINOR.to_s, + Arrow::Version::MICRO.to_s, + Arrow::Version::TAG, + ] + spec.version = version_components.compact.join(".") + spec.homepage = "https://arrow.apache.org/" + spec.authors = ["Apache Arrow Developers"] + spec.email = ["dev@arrow.apache.org"] + + spec.summary = "Red Arrow is the Ruby bindings of Apache Arrow" + spec.description = + "Apache Arrow is a common in-memory columnar data store. " + + "It's useful to share and process large data." + spec.license = "Apache-2.0" + spec.files = ["README.md", "Rakefile", "Gemfile", "#{spec.name}.gemspec"] + spec.files += ["LICENSE.txt", "NOTICE.txt"] + spec.files += Dir.glob("ext/**/*.{cpp,hpp,rb}") + spec.files += Dir.glob("lib/**/*.rb") + spec.files += Dir.glob("image/*.*") + spec.files += Dir.glob("doc/text/*") + spec.test_files += Dir.glob("test/**/*") + spec.extensions = ["ext/arrow/extconf.rb"] + + spec.add_runtime_dependency("bigdecimal", ">= 2.0.3") + spec.add_runtime_dependency("extpp", ">= 0.0.7") + spec.add_runtime_dependency("gio2", ">= 3.4.9") + spec.add_runtime_dependency("native-package-installer") + spec.add_runtime_dependency("pkg-config") + + spec.add_development_dependency("benchmark-driver") + spec.add_development_dependency("bundler") + spec.add_development_dependency("faker") + spec.add_development_dependency("fiddle", ">= 1.0.9") + spec.add_development_dependency("rake") + spec.add_development_dependency("redcarpet") + spec.add_development_dependency("test-unit") + spec.add_development_dependency("yard") + + required_msys2_package_version = version_components[0, 3].join(".") + spec.metadata["msys2_mingw_dependencies"] = + "arrow>=#{required_msys2_package_version}" +end diff --git a/src/arrow/ruby/red-arrow/test/fixture/TestOrcFile.test1.orc b/src/arrow/ruby/red-arrow/test/fixture/TestOrcFile.test1.orc Binary files differnew file mode 100644 index 000000000..4fb0beff8 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/fixture/TestOrcFile.test1.orc diff --git a/src/arrow/ruby/red-arrow/test/fixture/float-integer.csv b/src/arrow/ruby/red-arrow/test/fixture/float-integer.csv new file mode 100644 index 000000000..5eae562bc --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/fixture/float-integer.csv @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +score +2.9 +10 +-1.1 diff --git a/src/arrow/ruby/red-arrow/test/fixture/integer-float.csv b/src/arrow/ruby/red-arrow/test/fixture/integer-float.csv new file mode 100644 index 000000000..da7614199 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/fixture/integer-float.csv @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +score +10 +2.9 +-1.1 diff --git a/src/arrow/ruby/red-arrow/test/fixture/null-with-double-quote.csv b/src/arrow/ruby/red-arrow/test/fixture/null-with-double-quote.csv new file mode 100644 index 000000000..d84545928 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/fixture/null-with-double-quote.csv @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +name,score +alice,10 +bob,"" +chris,-1 diff --git a/src/arrow/ruby/red-arrow/test/fixture/null-without-double-quote.csv b/src/arrow/ruby/red-arrow/test/fixture/null-without-double-quote.csv new file mode 100644 index 000000000..c91c8880a --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/fixture/null-without-double-quote.csv @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +name,score +alice,10 +bob, +chris,-1 diff --git a/src/arrow/ruby/red-arrow/test/fixture/with-header-float.csv b/src/arrow/ruby/red-arrow/test/fixture/with-header-float.csv new file mode 100644 index 000000000..f62fc00b6 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/fixture/with-header-float.csv @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +name,score +alice,10.1 +bob,29.2 +chris,-1.3 diff --git a/src/arrow/ruby/red-arrow/test/fixture/with-header.csv b/src/arrow/ruby/red-arrow/test/fixture/with-header.csv new file mode 100644 index 000000000..a93fc5aec --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/fixture/with-header.csv @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +name,score +alice,10 +bob,29 +chris,-1 diff --git a/src/arrow/ruby/red-arrow/test/fixture/without-header-float.csv b/src/arrow/ruby/red-arrow/test/fixture/without-header-float.csv new file mode 100644 index 000000000..584a20996 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/fixture/without-header-float.csv @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +alice,10.1 +bob,29.2 +chris,-1.3 diff --git a/src/arrow/ruby/red-arrow/test/fixture/without-header.csv b/src/arrow/ruby/red-arrow/test/fixture/without-header.csv new file mode 100644 index 000000000..1f775eae4 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/fixture/without-header.csv @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +alice,10 +bob,29 +chris,-1 diff --git a/src/arrow/ruby/red-arrow/test/helper.rb b/src/arrow/ruby/red-arrow/test/helper.rb new file mode 100644 index 000000000..29e5f9cbc --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/helper.rb @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "arrow" + +require "fiddle" +require "pathname" +require "tempfile" +require "zlib" + +require "test-unit" + +require_relative "helper/fixture" +require_relative "helper/omittable" diff --git a/src/arrow/ruby/red-arrow/test/helper/fixture.rb b/src/arrow/ruby/red-arrow/test/helper/fixture.rb new file mode 100644 index 000000000..24445a7e4 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/helper/fixture.rb @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Helper + module Fixture + def fixture_dir + Pathname.new(__dir__).join("..", "fixture").expand_path + end + + def fixture_path(*components) + fixture_dir.join(*components) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/helper/omittable.rb b/src/arrow/ruby/red-arrow/test/helper/omittable.rb new file mode 100644 index 000000000..a1c0334b6 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/helper/omittable.rb @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Helper + module Omittable + def require_gi_bindings(major, minor, micro) + return if GLib.check_binding_version?(major, minor, micro) + message = + "Require gobject-introspection #{major}.#{minor}.#{micro} or later: " + + GLib::BINDING_VERSION.join(".") + omit(message) + end + + def require_gi(major, minor, micro) + return if GObjectIntrospection::Version.or_later?(major, minor, micro) + message = + "Require GObject Introspection #{major}.#{minor}.#{micro} or later: " + + GObjectIntrospection::Version::STRING + omit(message) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/raw-records/test-basic-arrays.rb b/src/arrow/ruby/red-arrow/test/raw-records/test-basic-arrays.rb new file mode 100644 index 000000000..c80020666 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/raw-records/test-basic-arrays.rb @@ -0,0 +1,365 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module RawRecordsBasicArraysTests + def test_null + records = [ + [nil], + [nil], + [nil], + [nil], + ] + target = build({column: :null}, records) + assert_equal(records, target.raw_records) + end + + def test_boolean + records = [ + [true], + [nil], + [false], + ] + target = build({column: :boolean}, records) + assert_equal(records, target.raw_records) + end + + def test_int8 + records = [ + [-(2 ** 7)], + [nil], + [(2 ** 7) - 1], + ] + target = build({column: :int8}, records) + assert_equal(records, target.raw_records) + end + + def test_uint8 + records = [ + [0], + [nil], + [(2 ** 8) - 1], + ] + target = build({column: :uint8}, records) + assert_equal(records, target.raw_records) + end + + def test_int16 + records = [ + [-(2 ** 15)], + [nil], + [(2 ** 15) - 1], + ] + target = build({column: :int16}, records) + assert_equal(records, target.raw_records) + end + + def test_uint16 + records = [ + [0], + [nil], + [(2 ** 16) - 1], + ] + target = build({column: :uint16}, records) + assert_equal(records, target.raw_records) + end + + def test_int32 + records = [ + [-(2 ** 31)], + [nil], + [(2 ** 31) - 1], + ] + target = build({column: :int32}, records) + assert_equal(records, target.raw_records) + end + + def test_uint32 + records = [ + [0], + [nil], + [(2 ** 32) - 1], + ] + target = build({column: :uint32}, records) + assert_equal(records, target.raw_records) + end + + def test_int64 + records = [ + [-(2 ** 63)], + [nil], + [(2 ** 63) - 1], + ] + target = build({column: :int64}, records) + assert_equal(records, target.raw_records) + end + + def test_uint64 + records = [ + [0], + [nil], + [(2 ** 64) - 1], + ] + target = build({column: :uint64}, records) + assert_equal(records, target.raw_records) + end + + def test_float + records = [ + [-1.0], + [nil], + [1.0], + ] + target = build({column: :float}, records) + assert_equal(records, target.raw_records) + end + + def test_double + records = [ + [-1.0], + [nil], + [1.0], + ] + target = build({column: :double}, records) + assert_equal(records, target.raw_records) + end + + def test_binary + records = [ + ["\x00".b], + [nil], + ["\xff".b], + ] + target = build({column: :binary}, records) + assert_equal(records, target.raw_records) + end + + def test_tring + records = [ + ["Ruby"], + [nil], + ["\u3042"], # U+3042 HIRAGANA LETTER A + ] + target = build({column: :string}, records) + assert_equal(records, target.raw_records) + end + + def test_date32 + records = [ + [Date.new(1960, 1, 1)], + [nil], + [Date.new(2017, 8, 23)], + ] + target = build({column: :date32}, records) + assert_equal(records, target.raw_records) + end + + def test_date64 + records = [ + [DateTime.new(1960, 1, 1, 2, 9, 30)], + [nil], + [DateTime.new(2017, 8, 23, 14, 57, 2)], + ] + target = build({column: :date64}, records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_second + records = [ + [Time.parse("1960-01-01T02:09:30Z")], + [nil], + [Time.parse("2017-08-23T14:57:02Z")], + ] + target = build({ + column: { + type: :timestamp, + unit: :second, + } + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_milli + records = [ + [Time.parse("1960-01-01T02:09:30.123Z")], + [nil], + [Time.parse("2017-08-23T14:57:02.987Z")], + ] + target = build({ + column: { + type: :timestamp, + unit: :milli, + } + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_micro + records = [ + [Time.parse("1960-01-01T02:09:30.123456Z")], + [nil], + [Time.parse("2017-08-23T14:57:02.987654Z")], + ] + target = build({ + column: { + type: :timestamp, + unit: :micro, + } + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_nano + records = [ + [Time.parse("1960-01-01T02:09:30.123456789Z")], + [nil], + [Time.parse("2017-08-23T14:57:02.987654321Z")], + ] + target = build({ + column: { + type: :timestamp, + unit: :nano, + } + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + records = [ + [Arrow::Time.new(unit, 60 * 10)], # 00:10:00 + [nil], + [Arrow::Time.new(unit, 60 * 60 * 2 + 9)], # 02:00:09 + ] + target = build({ + column: { + type: :time32, + unit: :second, + } + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + records = [ + [Arrow::Time.new(unit, (60 * 10) * 1000 + 123)], # 00:10:00.123 + [nil], + [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987)], # 02:00:09.987 + ] + target = build({ + column: { + type: :time32, + unit: :milli, + } + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + records = [ + # 00:10:00.123456 + [Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)], + [nil], + # 02:00:09.987654 + [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654)], + ] + target = build({ + column: { + type: :time64, + unit: :micro, + } + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + records = [ + # 00:10:00.123456789 + [Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)], + [nil], + # 02:00:09.987654321 + [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321)], + ] + target = build({ + column: { + type: :time64, + unit: :nano, + } + }, + records) + assert_equal(records, target.raw_records) + end + + def test_decimal128 + records = [ + [BigDecimal("92.92")], + [nil], + [BigDecimal("29.29")], + ] + target = build({ + column: { + type: :decimal128, + precision: 8, + scale: 2, + } + }, + records) + assert_equal(records, target.raw_records) + end + + def test_decimal256 + records = [ + [BigDecimal("92.92")], + [nil], + [BigDecimal("29.29")], + ] + target = build({ + column: { + type: :decimal256, + precision: 38, + scale: 2, + } + }, + records) + assert_equal(records, target.raw_records) + end +end + +class RawRecordsRecordBatchBasicArraysTest < Test::Unit::TestCase + include RawRecordsBasicArraysTests + + def build(schema, records) + Arrow::RecordBatch.new(schema, records) + end +end + +class RawRecordsTableBasicArraysTest < Test::Unit::TestCase + include RawRecordsBasicArraysTests + + def build(schema, records) + Arrow::Table.new(schema, records) + end +end diff --git a/src/arrow/ruby/red-arrow/test/raw-records/test-dense-union-array.rb b/src/arrow/ruby/red-arrow/test/raw-records/test-dense-union-array.rb new file mode 100644 index 000000000..8d94a77fe --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/raw-records/test-dense-union-array.rb @@ -0,0 +1,494 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module RawRecordsDenseUnionArrayTests + def build_schema(type, type_codes) + field_description = {} + if type.is_a?(Hash) + field_description = field_description.merge(type) + else + field_description[:type] = type + end + { + column: { + type: :dense_union, + fields: [ + field_description.merge(name: "0"), + field_description.merge(name: "1"), + ], + type_codes: type_codes, + }, + } + end + + # TODO: Use Arrow::RecordBatch.new(build_schema(type, type_codes), records) + def build_record_batch(type, records) + type_codes = [0, 1] + schema = Arrow::Schema.new(build_schema(type, type_codes)) + type_ids = [] + offsets = [] + arrays = schema.fields[0].data_type.fields.collect do |field| + sub_schema = Arrow::Schema.new([field]) + sub_records = [] + records.each do |record| + column = record[0] + next if column.nil? + next unless column.key?(field.name) + sub_records << [column[field.name]] + end + sub_record_batch = Arrow::RecordBatch.new(sub_schema, + sub_records) + sub_record_batch.columns[0].data + end + records.each do |record| + column = record[0] + if column.key?("0") + type_id = type_codes[0] + type_ids << type_id + offsets << (type_ids.count(type_id) - 1) + elsif column.key?("1") + type_id = type_codes[1] + type_ids << type_id + offsets << (type_ids.count(type_id) - 1) + end + end + union_array = Arrow::DenseUnionArray.new(schema.fields[0].data_type, + Arrow::Int8Array.new(type_ids), + Arrow::Int32Array.new(offsets), + arrays) + schema = Arrow::Schema.new(column: union_array.value_data_type) + Arrow::RecordBatch.new(schema, + records.size, + [union_array]) + end + + def test_null + records = [ + [{"0" => nil}], + ] + target = build(:null, records) + assert_equal(records, target.raw_records) + end + + def test_boolean + records = [ + [{"0" => true}], + [{"1" => nil}], + ] + target = build(:boolean, records) + assert_equal(records, target.raw_records) + end + + def test_int8 + records = [ + [{"0" => -(2 ** 7)}], + [{"1" => nil}], + ] + target = build(:int8, records) + assert_equal(records, target.raw_records) + end + + def test_uint8 + records = [ + [{"0" => (2 ** 8) - 1}], + [{"1" => nil}], + ] + target = build(:uint8, records) + assert_equal(records, target.raw_records) + end + + def test_int16 + records = [ + [{"0" => -(2 ** 15)}], + [{"1" => nil}], + ] + target = build(:int16, records) + assert_equal(records, target.raw_records) + end + + def test_uint16 + records = [ + [{"0" => (2 ** 16) - 1}], + [{"1" => nil}], + ] + target = build(:uint16, records) + assert_equal(records, target.raw_records) + end + + def test_int32 + records = [ + [{"0" => -(2 ** 31)}], + [{"1" => nil}], + ] + target = build(:int32, records) + assert_equal(records, target.raw_records) + end + + def test_uint32 + records = [ + [{"0" => (2 ** 32) - 1}], + [{"1" => nil}], + ] + target = build(:uint32, records) + assert_equal(records, target.raw_records) + end + + def test_int64 + records = [ + [{"0" => -(2 ** 63)}], + [{"1" => nil}], + ] + target = build(:int64, records) + assert_equal(records, target.raw_records) + end + + def test_uint64 + records = [ + [{"0" => (2 ** 64) - 1}], + [{"1" => nil}], + ] + target = build(:uint64, records) + assert_equal(records, target.raw_records) + end + + def test_float + records = [ + [{"0" => -1.0}], + [{"1" => nil}], + ] + target = build(:float, records) + assert_equal(records, target.raw_records) + end + + def test_double + records = [ + [{"0" => -1.0}], + [{"1" => nil}], + ] + target = build(:double, records) + assert_equal(records, target.raw_records) + end + + def test_binary + records = [ + [{"0" => "\xff".b}], + [{"1" => nil}], + ] + target = build(:binary, records) + assert_equal(records, target.raw_records) + end + + def test_string + records = [ + [{"0" => "Ruby"}], + [{"1" => nil}], + ] + target = build(:string, records) + assert_equal(records, target.raw_records) + end + + def test_date32 + records = [ + [{"0" => Date.new(1960, 1, 1)}], + [{"1" => nil}], + ] + target = build(:date32, records) + assert_equal(records, target.raw_records) + end + + def test_date64 + records = [ + [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}], + [{"1" => nil}], + ] + target = build(:date64, records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_second + records = [ + [{"0" => Time.parse("1960-01-01T02:09:30Z")}], + [{"1" => nil}], + ] + target = build({ + type: :timestamp, + unit: :second, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_milli + records = [ + [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}], + [{"1" => nil}], + ] + target = build({ + type: :timestamp, + unit: :milli, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_micro + records = [ + [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}], + [{"1" => nil}], + ] + target = build({ + type: :timestamp, + unit: :micro, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_nano + records = [ + [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}], + [{"1" => nil}], + ] + target = build({ + type: :timestamp, + unit: :nano, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + records = [ + # 00:10:00 + [{"0" => Arrow::Time.new(unit, 60 * 10)}], + [{"1" => nil}], + ] + target = build({ + type: :time32, + unit: :second, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + records = [ + # 00:10:00.123 + [{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}], + [{"1" => nil}], + ] + target = build({ + type: :time32, + unit: :milli, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + records = [ + # 00:10:00.123456 + [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}], + [{"1" => nil}], + ] + target = build({ + type: :time64, + unit: :micro, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + records = [ + # 00:10:00.123456789 + [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}], + [{"1" => nil}], + ] + target = build({ + type: :time64, + unit: :nano, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_decimal128 + records = [ + [{"0" => BigDecimal("92.92")}], + [{"1" => nil}], + ] + target = build({ + type: :decimal128, + precision: 8, + scale: 2, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_decimal256 + records = [ + [{"0" => BigDecimal("92.92")}], + [{"1" => nil}], + ] + target = build({ + type: :decimal256, + precision: 38, + scale: 2, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_list + records = [ + [{"0" => [true, nil, false]}], + [{"1" => nil}], + ] + target = build({ + type: :list, + field: { + name: :sub_element, + type: :boolean, + }, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_struct + records = [ + [{"0" => {"sub_field" => true}}], + [{"1" => nil}], + [{"0" => {"sub_field" => nil}}], + ] + target = build({ + type: :struct, + fields: [ + { + name: :sub_field, + type: :boolean, + }, + ], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_map + records = [ + [{"0" => {"key1" => true, "key2" => nil}}], + [{"1" => nil}], + ] + target = build({ + type: :map, + key: :string, + item: :boolean, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_sparse_union + omit("Need to add support for SparseUnionArrayBuilder") + records = [ + [{"0" => {"field1" => true}}], + [{"1" => nil}], + [{"0" => {"field2" => nil}}], + ] + target = build({ + type: :sparse_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_dense_union + omit("Need to add support for DenseUnionArrayBuilder") + records = [ + [{"0" => {"field1" => true}}], + [{"1" => nil}], + [{"0" => {"field2" => nil}}], + ] + target = build({ + type: :dense_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_dictionary + omit("Need to add support for DictionaryArrayBuilder") + records = [ + [{"0" => "Ruby"}], + [{"1" => nil}], + [{"0" => "GLib"}], + ] + dictionary = Arrow::StringArray.new(["GLib", "Ruby"]) + target = build({ + type: :dictionary, + index_data_type: :int8, + dictionary: dictionary, + ordered: true, + }, + records) + assert_equal(records, target.raw_records) + end +end + +class RawRecordsRecordBatchDenseUnionArrayTest < Test::Unit::TestCase + include RawRecordsDenseUnionArrayTests + + def build(type, records) + build_record_batch(type, records) + end +end + +class RawRecordsTableDenseUnionArrayTest < Test::Unit::TestCase + include RawRecordsDenseUnionArrayTests + + def build(type, records) + build_record_batch(type, records).to_table + end +end diff --git a/src/arrow/ruby/red-arrow/test/raw-records/test-list-array.rb b/src/arrow/ruby/red-arrow/test/raw-records/test-list-array.rb new file mode 100644 index 000000000..6d7d4c079 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/raw-records/test-list-array.rb @@ -0,0 +1,571 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module RawRecordsListArrayTests + def build_schema(type) + field_description = { + name: :element, + } + if type.is_a?(Hash) + field_description = field_description.merge(type) + else + field_description[:type] = type + end + { + column: { + type: :list, + field: field_description, + }, + } + end + + def test_null + records = [ + [[nil, nil, nil]], + [nil], + ] + target = build(:null, records) + assert_equal(records, target.raw_records) + end + + def test_boolean + records = [ + [[true, nil, false]], + [nil], + ] + target = build(:boolean, records) + assert_equal(records, target.raw_records) + end + + def test_int8 + records = [ + [[-(2 ** 7), nil, (2 ** 7) - 1]], + [nil], + ] + target = build(:int8, records) + assert_equal(records, target.raw_records) + end + + def test_uint8 + records = [ + [[0, nil, (2 ** 8) - 1]], + [nil], + ] + target = build(:uint8, records) + assert_equal(records, target.raw_records) + end + + def test_int16 + records = [ + [[-(2 ** 15), nil, (2 ** 15) - 1]], + [nil], + ] + target = build(:int16, records) + assert_equal(records, target.raw_records) + end + + def test_uint16 + records = [ + [[0, nil, (2 ** 16) - 1]], + [nil], + ] + target = build(:uint16, records) + assert_equal(records, target.raw_records) + end + + def test_int32 + records = [ + [[-(2 ** 31), nil, (2 ** 31) - 1]], + [nil], + ] + target = build(:int32, records) + assert_equal(records, target.raw_records) + end + + def test_uint32 + records = [ + [[0, nil, (2 ** 32) - 1]], + [nil], + ] + target = build(:uint32, records) + assert_equal(records, target.raw_records) + end + + def test_int64 + records = [ + [[-(2 ** 63), nil, (2 ** 63) - 1]], + [nil], + ] + target = build(:int64, records) + assert_equal(records, target.raw_records) + end + + def test_uint64 + records = [ + [[0, nil, (2 ** 64) - 1]], + [nil], + ] + target = build(:uint64, records) + assert_equal(records, target.raw_records) + end + + def test_float + records = [ + [[-1.0, nil, 1.0]], + [nil], + ] + target = build(:float, records) + assert_equal(records, target.raw_records) + end + + def test_double + records = [ + [[-1.0, nil, 1.0]], + [nil], + ] + target = build(:double, records) + assert_equal(records, target.raw_records) + end + + def test_binary + records = [ + [["\x00".b, nil, "\xff".b]], + [nil], + ] + target = build(:binary, records) + assert_equal(records, target.raw_records) + end + + def test_string + records = [ + [ + [ + "Ruby", + nil, + "\u3042", # U+3042 HIRAGANA LETTER A + ], + ], + [nil], + ] + target = build(:string, records) + assert_equal(records, target.raw_records) + end + + def test_date32 + records = [ + [ + [ + Date.new(1960, 1, 1), + nil, + Date.new(2017, 8, 23), + ], + ], + [nil], + ] + target = build(:date32, records) + assert_equal(records, target.raw_records) + end + + def test_date64 + records = [ + [ + [ + DateTime.new(1960, 1, 1, 2, 9, 30), + nil, + DateTime.new(2017, 8, 23, 14, 57, 2), + ], + ], + [nil], + ] + target = build(:date64, records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_second + records = [ + [ + [ + Time.parse("1960-01-01T02:09:30Z"), + nil, + Time.parse("2017-08-23T14:57:02Z"), + ], + ], + [nil], + ] + target = build({ + type: :timestamp, + unit: :second, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_milli + records = [ + [ + [ + Time.parse("1960-01-01T02:09:30.123Z"), + nil, + Time.parse("2017-08-23T14:57:02.987Z"), + ], + ], + [nil], + ] + target = build({ + type: :timestamp, + unit: :milli, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_micro + records = [ + [ + [ + Time.parse("1960-01-01T02:09:30.123456Z"), + nil, + Time.parse("2017-08-23T14:57:02.987654Z"), + ], + ], + [nil], + ] + target = build({ + type: :timestamp, + unit: :micro, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_nano + records = [ + [ + [ + Time.parse("1960-01-01T02:09:30.123456789Z"), + nil, + Time.parse("2017-08-23T14:57:02.987654321Z"), + ], + ], + [nil], + ] + target = build({ + type: :timestamp, + unit: :nano, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + records = [ + [ + [ + # 00:10:00 + Arrow::Time.new(unit, 60 * 10), + nil, + # 02:00:09 + Arrow::Time.new(unit, 60 * 60 * 2 + 9), + ], + ], + [nil], + ] + target = build({ + type: :time32, + unit: :second, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + records = [ + [ + [ + # 00:10:00.123 + Arrow::Time.new(unit, (60 * 10) * 1000 + 123), + nil, + # 02:00:09.987 + Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987), + ], + ], + [nil], + ] + target = build({ + type: :time32, + unit: :milli, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + records = [ + [ + [ + # 00:10:00.123456 + Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456), + nil, + # 02:00:09.987654 + Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654), + ], + ], + [nil], + ] + target = build({ + type: :time64, + unit: :micro, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + records = [ + [ + [ + # 00:10:00.123456789 + Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789), + nil, + # 02:00:09.987654321 + Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321), + ], + ], + [nil], + ] + target = build({ + type: :time64, + unit: :nano, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_decimal128 + records = [ + [ + [ + BigDecimal("92.92"), + nil, + BigDecimal("29.29"), + ], + ], + [nil], + ] + target = build({ + type: :decimal128, + precision: 8, + scale: 2, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_decimal256 + records = [ + [ + [ + BigDecimal("92.92"), + nil, + BigDecimal("29.29"), + ], + ], + [nil], + ] + target = build({ + type: :decimal256, + precision: 38, + scale: 2, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_list + records = [ + [ + [ + [ + true, + nil, + ], + nil, + [ + nil, + false, + ], + ], + ], + [nil], + ] + target = build({ + type: :list, + field: { + name: :sub_element, + type: :boolean, + }, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_struct + records = [ + [ + [ + {"field" => true}, + nil, + {"field" => nil}, + ], + ], + [nil], + ] + target = build({ + type: :struct, + fields: [ + { + name: :field, + type: :boolean, + }, + ], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_map + records = [ + [ + [ + {"key1" => true, "key2" => nil}, + nil, + ], + ], + [nil], + ] + target = build({ + type: :map, + key: :string, + item: :boolean, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_sparse + omit("Need to add support for SparseUnionArrayBuilder") + records = [ + [ + [ + {"field1" => true}, + nil, + {"field2" => nil}, + ], + ], + [nil], + ] + target = build({ + type: :sparse_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_dense + omit("Need to add support for DenseUnionArrayBuilder") + records = [ + [ + [ + {"field1" => true}, + nil, + {"field2" => nil}, + ], + ], + [nil], + ] + target = build({ + type: :dense_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_dictionary + omit("Need to add support for DictionaryArrayBuilder") + records = [ + [ + [ + "Ruby", + nil, + "GLib", + ], + ], + [nil], + ] + dictionary = Arrow::StringArray.new(["GLib", "Ruby"]) + target = build({ + type: :dictionary, + index_data_type: :int8, + dictionary: dictionary, + ordered: true, + }, + records) + assert_equal(records, target.raw_records) + end +end + +class RawRecordsRecordBatchListArrayTest < Test::Unit::TestCase + include RawRecordsListArrayTests + + def build(type, records) + Arrow::RecordBatch.new(build_schema(type), records) + end +end + +class RawRecordsTableListArrayTest < Test::Unit::TestCase + include RawRecordsListArrayTests + + def build(type, records) + Arrow::Table.new(build_schema(type), records) + end +end diff --git a/src/arrow/ruby/red-arrow/test/raw-records/test-map-array.rb b/src/arrow/ruby/red-arrow/test/raw-records/test-map-array.rb new file mode 100644 index 000000000..c5abb7d77 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/raw-records/test-map-array.rb @@ -0,0 +1,441 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module RawRecordsMapArrayTests + def build_schema(type) + { + column: { + type: :map, + key: :string, + item: type + }, + } + end + + def test_null + records = [ + [{"key1" => nil}], + [nil], + ] + target = build(:null, records) + assert_equal(records, target.raw_records) + end + + def test_boolean + records = [ + [{"key1" => true, "key2" => nil}], + [nil], + ] + target = build(:boolean, records) + assert_equal(records, target.raw_records) + end + + def test_int8 + records = [ + [{"key1" => -(2 ** 7), "key2" => nil}], + [nil], + ] + target = build(:int8, records) + assert_equal(records, target.raw_records) + end + + def test_uint8 + records = [ + [{"key1" => (2 ** 8) - 1, "key2" => nil}], + [nil], + ] + target = build(:uint8, records) + assert_equal(records, target.raw_records) + end + + def test_int16 + records = [ + [{"key1" => -(2 ** 15), "key2" => nil}], + [nil], + ] + target = build(:int16, records) + assert_equal(records, target.raw_records) + end + + def test_uint16 + records = [ + [{"key1" => (2 ** 16) - 1, "key2" => nil}], + [nil], + ] + target = build(:uint16, records) + assert_equal(records, target.raw_records) + end + + def test_int32 + records = [ + [{"key1" => -(2 ** 31), "key2" => nil}], + [nil], + ] + target = build(:int32, records) + assert_equal(records, target.raw_records) + end + + def test_uint32 + records = [ + [{"key1" => (2 ** 32) - 1, "key2" => nil}], + [nil], + ] + target = build(:uint32, records) + assert_equal(records, target.raw_records) + end + + def test_int64 + records = [ + [{"key1" => -(2 ** 63), "key2" => nil}], + [nil], + ] + target = build(:int64, records) + assert_equal(records, target.raw_records) + end + + def test_uint64 + records = [ + [{"key1" => (2 ** 64) - 1, "key2" => nil}], + [nil], + ] + target = build(:uint64, records) + assert_equal(records, target.raw_records) + end + + def test_float + records = [ + [{"key1" => -1.0, "key2" => nil}], + [nil], + ] + target = build(:float, records) + assert_equal(records, target.raw_records) + end + + def test_double + records = [ + [{"key1" => -1.0, "key2" => nil}], + [nil], + ] + target = build(:double, records) + assert_equal(records, target.raw_records) + end + + def test_binary + records = [ + [{"key1" => "\xff".b, "key2" => nil}], + [nil], + ] + target = build(:binary, records) + assert_equal(records, target.raw_records) + end + + def test_string + records = [ + [{"key1" => "Ruby", "key2" => nil}], + [nil], + ] + target = build(:string, records) + assert_equal(records, target.raw_records) + end + + def test_date32 + records = [ + [{"key1" => Date.new(1960, 1, 1), "key2" => nil}], + [nil], + ] + target = build(:date32, records) + assert_equal(records, target.raw_records) + end + + def test_date64 + records = [ + [{"key1" => DateTime.new(1960, 1, 1, 2, 9, 30), "key2" => nil}], + [nil], + ] + target = build(:date64, records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_second + records = [ + [{"key1" => Time.parse("1960-01-01T02:09:30Z"), "key2" => nil}], + [nil], + ] + target = build({ + type: :timestamp, + unit: :second, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_milli + records = [ + [{"key1" => Time.parse("1960-01-01T02:09:30.123Z"), "key2" => nil}], + [nil], + ] + target = build({ + type: :timestamp, + unit: :milli, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_micro + records = [ + [{"key1" => Time.parse("1960-01-01T02:09:30.123456Z"), "key2" => nil}], + [nil], + ] + target = build({ + type: :timestamp, + unit: :micro, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_nano + records = [ + [{"key1" => Time.parse("1960-01-01T02:09:30.123456789Z"), "key2" => nil}], + [nil], + ] + target = build({ + type: :timestamp, + unit: :nano, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + records = [ + # 00:10:00 + [{"key1" => Arrow::Time.new(unit, 60 * 10), "key2" => nil}], + [nil], + ] + target = build({ + type: :time32, + unit: :second, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + records = [ + # 00:10:00.123 + [{"key1" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123), "key2" => nil}], + [nil], + ] + target = build({ + type: :time32, + unit: :milli, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + records = [ + # 00:10:00.123456 + [{"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456), "key2" => nil}], + [nil], + ] + target = build({ + type: :time64, + unit: :micro, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + records = [ + # 00:10:00.123456789 + [{"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789), "key2" => nil}], + [nil], + ] + target = build({ + type: :time64, + unit: :nano, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_decimal128 + records = [ + [{"key1" => BigDecimal("92.92"), "key2" => nil}], + [nil], + ] + target = build({ + type: :decimal128, + precision: 8, + scale: 2, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_decimal256 + records = [ + [{"key1" => BigDecimal("92.92"), "key2" => nil}], + [nil], + ] + target = build({ + type: :decimal256, + precision: 38, + scale: 2, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_list + records = [ + [{"key1" => [true, nil, false], "key2" => nil}], + [nil], + ] + target = build({ + type: :list, + field: { + name: :element, + type: :boolean, + }, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_struct + records = [ + [{"key1" => {"field" => true}, "key2" => nil, "key3" => {"field" => nil}}], + [nil], + ] + target = build({ + type: :struct, + fields: [ + { + name: :field, + type: :boolean, + }, + ], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_map + records = [ + [{"key1" => {"sub_key1" => true, "sub_key2" => nil}, "key2" => nil}], + [nil], + ] + target = build({ + type: :map, + key: :string, + item: :boolean, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_sparse_union + omit("Need to add support for SparseUnionArrayBuilder") + records = [ + [{"key1" => {"field" => true, "key2" => nil, "key3" => {"field" => nil}}}], + [nil], + ] + target = build({ + type: :sparse_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_dense_union + omit("Need to add support for DenseUnionArrayBuilder") + records = [ + [{"key1" => {"field1" => true}, "key2" => nil, "key3" => {"field2" => nil}}], + [nil], + ] + target = build({ + type: :dense_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_dictionary + omit("Need to add support for DictionaryArrayBuilder") + records = [ + [{"key1" => "Ruby", "key2" => nil, "key3" => "GLib"}], + [nil], + ] + dictionary = Arrow::StringArray.new(["GLib", "Ruby"]) + target = build({ + type: :dictionary, + index_data_type: :int8, + dictionary: dictionary, + ordered: true, + }, + records) + assert_equal(records, target.raw_records) + end +end + +class RawRecordsRecordBatchMapArrayTest < Test::Unit::TestCase + include RawRecordsMapArrayTests + + def build(type, records) + Arrow::RecordBatch.new(build_schema(type), records) + end +end + +class RawRecordsTableMapArrayTest < Test::Unit::TestCase + include RawRecordsMapArrayTests + + def build(type, records) + Arrow::Table.new(build_schema(type), records) + end +end diff --git a/src/arrow/ruby/red-arrow/test/raw-records/test-multiple-columns.rb b/src/arrow/ruby/red-arrow/test/raw-records/test-multiple-columns.rb new file mode 100644 index 000000000..50dff67ce --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/raw-records/test-multiple-columns.rb @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module RawRecordsMultipleColumnsTests + def test_3_elements + records = [ + [true, nil, "Ruby"], + [nil, 0, "GLib"], + [false, 2 ** 8 - 1, nil], + ] + target = build([ + {name: :column0, type: :boolean}, + {name: :column1, type: :uint8}, + {name: :column2, type: :string}, + ], + records) + assert_equal(records, target.raw_records) + end + + def test_4_elements + records = [ + [true, nil, "Ruby", -(2 ** 63)], + [nil, 0, "GLib", nil], + [false, 2 ** 8 - 1, nil, (2 ** 63) - 1], + ] + target = build([ + {name: :column0, type: :boolean}, + {name: :column1, type: :uint8}, + {name: :column2, type: :string}, + {name: :column3, type: :int64}, + ], + records) + assert_equal(records, target.raw_records) + end +end + +class RawRecordsRecordBatchMultipleColumnsTest < Test::Unit::TestCase + include RawRecordsMultipleColumnsTests + + def build(schema, records) + Arrow::RecordBatch.new(schema, records) + end +end + +class RawRecordsTableMultipleColumnsTest < Test::Unit::TestCase + include RawRecordsMultipleColumnsTests + + def build(schema, records) + Arrow::Table.new(schema, records) + end +end diff --git a/src/arrow/ruby/red-arrow/test/raw-records/test-sparse-union-array.rb b/src/arrow/ruby/red-arrow/test/raw-records/test-sparse-union-array.rb new file mode 100644 index 000000000..415401216 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/raw-records/test-sparse-union-array.rb @@ -0,0 +1,484 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module RawRecordsSparseUnionArrayTests + def build_schema(type, type_codes) + field_description = {} + if type.is_a?(Hash) + field_description = field_description.merge(type) + else + field_description[:type] = type + end + { + column: { + type: :sparse_union, + fields: [ + field_description.merge(name: "0"), + field_description.merge(name: "1"), + ], + type_codes: type_codes, + }, + } + end + + # TODO: Use Arrow::RecordBatch.new(build_schema(type, type_codes), records) + def build_record_batch(type, records) + type_codes = [0, 1] + schema = Arrow::Schema.new(build_schema(type, type_codes)) + type_ids = [] + arrays = schema.fields[0].data_type.fields.collect do |field| + sub_schema = Arrow::Schema.new([field]) + sub_records = records.collect do |record| + [record[0].nil? ? nil : record[0][field.name]] + end + sub_record_batch = Arrow::RecordBatch.new(sub_schema, + sub_records) + sub_record_batch.columns[0].data + end + records.each do |record| + column = record[0] + if column.key?("0") + type_ids << type_codes[0] + elsif column.key?("1") + type_ids << type_codes[1] + end + end + union_array = Arrow::SparseUnionArray.new(schema.fields[0].data_type, + Arrow::Int8Array.new(type_ids), + arrays) + schema = Arrow::Schema.new(column: union_array.value_data_type) + Arrow::RecordBatch.new(schema, + records.size, + [union_array]) + end + + def test_null + records = [ + [{"0" => nil}], + ] + target = build(:null, records) + assert_equal(records, target.raw_records) + end + + def test_boolean + records = [ + [{"0" => true}], + [{"1" => nil}], + ] + target = build(:boolean, records) + assert_equal(records, target.raw_records) + end + + def test_int8 + records = [ + [{"0" => -(2 ** 7)}], + [{"1" => nil}], + ] + target = build(:int8, records) + assert_equal(records, target.raw_records) + end + + def test_uint8 + records = [ + [{"0" => (2 ** 8) - 1}], + [{"1" => nil}], + ] + target = build(:uint8, records) + assert_equal(records, target.raw_records) + end + + def test_int16 + records = [ + [{"0" => -(2 ** 15)}], + [{"1" => nil}], + ] + target = build(:int16, records) + assert_equal(records, target.raw_records) + end + + def test_uint16 + records = [ + [{"0" => (2 ** 16) - 1}], + [{"1" => nil}], + ] + target = build(:uint16, records) + assert_equal(records, target.raw_records) + end + + def test_int32 + records = [ + [{"0" => -(2 ** 31)}], + [{"1" => nil}], + ] + target = build(:int32, records) + assert_equal(records, target.raw_records) + end + + def test_uint32 + records = [ + [{"0" => (2 ** 32) - 1}], + [{"1" => nil}], + ] + target = build(:uint32, records) + assert_equal(records, target.raw_records) + end + + def test_int64 + records = [ + [{"0" => -(2 ** 63)}], + [{"1" => nil}], + ] + target = build(:int64, records) + assert_equal(records, target.raw_records) + end + + def test_uint64 + records = [ + [{"0" => (2 ** 64) - 1}], + [{"1" => nil}], + ] + target = build(:uint64, records) + assert_equal(records, target.raw_records) + end + + def test_float + records = [ + [{"0" => -1.0}], + [{"1" => nil}], + ] + target = build(:float, records) + assert_equal(records, target.raw_records) + end + + def test_double + records = [ + [{"0" => -1.0}], + [{"1" => nil}], + ] + target = build(:double, records) + assert_equal(records, target.raw_records) + end + + def test_binary + records = [ + [{"0" => "\xff".b}], + [{"1" => nil}], + ] + target = build(:binary, records) + assert_equal(records, target.raw_records) + end + + def test_string + records = [ + [{"0" => "Ruby"}], + [{"1" => nil}], + ] + target = build(:string, records) + assert_equal(records, target.raw_records) + end + + def test_date32 + records = [ + [{"0" => Date.new(1960, 1, 1)}], + [{"1" => nil}], + ] + target = build(:date32, records) + assert_equal(records, target.raw_records) + end + + def test_date64 + records = [ + [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}], + [{"1" => nil}], + ] + target = build(:date64, records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_second + records = [ + [{"0" => Time.parse("1960-01-01T02:09:30Z")}], + [{"1" => nil}], + ] + target = build({ + type: :timestamp, + unit: :second, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_milli + records = [ + [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}], + [{"1" => nil}], + ] + target = build({ + type: :timestamp, + unit: :milli, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_micro + records = [ + [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}], + [{"1" => nil}], + ] + target = build({ + type: :timestamp, + unit: :micro, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_nano + records = [ + [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}], + [{"1" => nil}], + ] + target = build({ + type: :timestamp, + unit: :nano, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + records = [ + # 00:10:00 + [{"0" => Arrow::Time.new(unit, 60 * 10)}], + [{"1" => nil}], + ] + target = build({ + type: :time32, + unit: :second, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + records = [ + # 00:10:00.123 + [{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}], + [{"1" => nil}], + ] + target = build({ + type: :time32, + unit: :milli, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + records = [ + # 00:10:00.123456 + [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}], + [{"1" => nil}], + ] + target = build({ + type: :time64, + unit: :micro, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + records = [ + # 00:10:00.123456789 + [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}], + [{"1" => nil}], + ] + target = build({ + type: :time64, + unit: :nano, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_decimal128 + records = [ + [{"0" => BigDecimal("92.92")}], + [{"1" => nil}], + ] + target = build({ + type: :decimal128, + precision: 8, + scale: 2, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_decimal256 + records = [ + [{"0" => BigDecimal("92.92")}], + [{"1" => nil}], + ] + target = build({ + type: :decimal256, + precision: 38, + scale: 2, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_list + records = [ + [{"0" => [true, nil, false]}], + [{"1" => nil}], + ] + target = build({ + type: :list, + field: { + name: :sub_element, + type: :boolean, + }, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_struct + records = [ + [{"0" => {"sub_field" => true}}], + [{"1" => nil}], + [{"0" => {"sub_field" => nil}}], + ] + target = build({ + type: :struct, + fields: [ + { + name: :sub_field, + type: :boolean, + }, + ], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_map + records = [ + [{"0" => {"key1" => true, "key2" => nil}}], + [{"1" => nil}], + ] + target = build({ + type: :map, + key: :string, + item: :boolean, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_sparse_union + omit("Need to add support for SparseUnionArrayBuilder") + records = [ + [{"0" => {"field1" => true}}], + [{"1" => nil}], + [{"0" => {"field2" => nil}}], + ] + target = build({ + type: :sparse_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_dense_union + omit("Need to add support for DenseUnionArrayBuilder") + records = [ + [{"0" => {"field1" => true}}], + [{"1" => nil}], + [{"0" => {"field2" => nil}}], + ] + target = build({ + type: :dense_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_dictionary + omit("Need to add support for DictionaryArrayBuilder") + records = [ + [{"0" => "Ruby"}], + [{"1" => nil}], + [{"0" => "GLib"}], + ] + dictionary = Arrow::StringArray.new(["GLib", "Ruby"]) + target = build({ + type: :dictionary, + index_data_type: :int8, + dictionary: dictionary, + ordered: true, + }, + records) + assert_equal(records, target.raw_records) + end +end + +class RawRecordsRecordBatchSparseUnionArrayTest < Test::Unit::TestCase + include RawRecordsSparseUnionArrayTests + + def build(type, records) + build_record_batch(type, records) + end +end + +class RawRecordsTableSparseUnionArrayTest < Test::Unit::TestCase + include RawRecordsSparseUnionArrayTests + + def build(type, records) + build_record_batch(type, records).to_table + end +end diff --git a/src/arrow/ruby/red-arrow/test/raw-records/test-struct-array.rb b/src/arrow/ruby/red-arrow/test/raw-records/test-struct-array.rb new file mode 100644 index 000000000..6c01facf8 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/raw-records/test-struct-array.rb @@ -0,0 +1,485 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module RawRecordsStructArrayTests + def build_schema(type) + field_description = { + name: :field, + } + if type.is_a?(Hash) + field_description = field_description.merge(type) + else + field_description[:type] = type + end + { + column: { + type: :struct, + fields: [ + field_description, + ], + }, + } + end + + def test_null + records = [ + [{"field" => nil}], + [nil], + ] + target = build(:null, records) + assert_equal(records, target.raw_records) + end + + def test_boolean + records = [ + [{"field" => true}], + [nil], + [{"field" => nil}], + ] + target = build(:boolean, records) + assert_equal(records, target.raw_records) + end + + def test_int8 + records = [ + [{"field" => -(2 ** 7)}], + [nil], + [{"field" => nil}], + ] + target = build(:int8, records) + assert_equal(records, target.raw_records) + end + + def test_uint8 + records = [ + [{"field" => (2 ** 8) - 1}], + [nil], + [{"field" => nil}], + ] + target = build(:uint8, records) + assert_equal(records, target.raw_records) + end + + def test_int16 + records = [ + [{"field" => -(2 ** 15)}], + [nil], + [{"field" => nil}], + ] + target = build(:int16, records) + assert_equal(records, target.raw_records) + end + + def test_uint16 + records = [ + [{"field" => (2 ** 16) - 1}], + [nil], + [{"field" => nil}], + ] + target = build(:uint16, records) + assert_equal(records, target.raw_records) + end + + def test_int32 + records = [ + [{"field" => -(2 ** 31)}], + [nil], + [{"field" => nil}], + ] + target = build(:int32, records) + assert_equal(records, target.raw_records) + end + + def test_uint32 + records = [ + [{"field" => (2 ** 32) - 1}], + [nil], + [{"field" => nil}], + ] + target = build(:uint32, records) + assert_equal(records, target.raw_records) + end + + def test_int64 + records = [ + [{"field" => -(2 ** 63)}], + [nil], + [{"field" => nil}], + ] + target = build(:int64, records) + assert_equal(records, target.raw_records) + end + + def test_uint64 + records = [ + [{"field" => (2 ** 64) - 1}], + [nil], + [{"field" => nil}], + ] + target = build(:uint64, records) + assert_equal(records, target.raw_records) + end + + def test_float + records = [ + [{"field" => -1.0}], + [nil], + [{"field" => nil}], + ] + target = build(:float, records) + assert_equal(records, target.raw_records) + end + + def test_double + records = [ + [{"field" => -1.0}], + [nil], + [{"field" => nil}], + ] + target = build(:double, records) + assert_equal(records, target.raw_records) + end + + def test_binary + records = [ + [{"field" => "\xff".b}], + [nil], + [{"field" => nil}], + ] + target = build(:binary, records) + assert_equal(records, target.raw_records) + end + + def test_string + records = [ + [{"field" => "Ruby"}], + [nil], + [{"field" => nil}], + ] + target = build(:string, records) + assert_equal(records, target.raw_records) + end + + def test_date32 + records = [ + [{"field" => Date.new(1960, 1, 1)}], + [nil], + [{"field" => nil}], + ] + target = build(:date32, records) + assert_equal(records, target.raw_records) + end + + def test_date64 + records = [ + [{"field" => DateTime.new(1960, 1, 1, 2, 9, 30)}], + [nil], + [{"field" => nil}], + ] + target = build(:date64, records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_second + records = [ + [{"field" => Time.parse("1960-01-01T02:09:30Z")}], + [nil], + [{"field" => nil}], + ] + target = build({ + type: :timestamp, + unit: :second, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_milli + records = [ + [{"field" => Time.parse("1960-01-01T02:09:30.123Z")}], + [nil], + [{"field" => nil}], + ] + target = build({ + type: :timestamp, + unit: :milli, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_micro + records = [ + [{"field" => Time.parse("1960-01-01T02:09:30.123456Z")}], + [nil], + [{"field" => nil}], + ] + target = build({ + type: :timestamp, + unit: :micro, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_nano + records = [ + [{"field" => Time.parse("1960-01-01T02:09:30.123456789Z")}], + [nil], + [{"field" => nil}], + ] + target = build({ + type: :timestamp, + unit: :nano, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + records = [ + # 00:10:00 + [{"field" => Arrow::Time.new(unit, 60 * 10)}], + [nil], + [{"field" => nil}], + ] + target = build({ + type: :time32, + unit: :second, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + records = [ + # 00:10:00.123 + [{"field" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}], + [nil], + [{"field" => nil}], + ] + target = build({ + type: :time32, + unit: :milli, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + records = [ + # 00:10:00.123456 + [{"field" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}], + [nil], + [{"field" => nil}], + ] + target = build({ + type: :time64, + unit: :micro, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + records = [ + # 00:10:00.123456789 + [{"field" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}], + [nil], + [{"field" => nil}], + ] + target = build({ + type: :time64, + unit: :nano, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_decimal128 + records = [ + [{"field" => BigDecimal("92.92")}], + [nil], + [{"field" => nil}], + ] + target = build({ + type: :decimal128, + precision: 8, + scale: 2, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_decimal256 + records = [ + [{"field" => BigDecimal("92.92")}], + [nil], + [{"field" => nil}], + ] + target = build({ + type: :decimal256, + precision: 38, + scale: 2, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_list + records = [ + [{"field" => [true, nil, false]}], + [nil], + [{"field" => nil}], + ] + target = build({ + type: :list, + field: { + name: :sub_element, + type: :boolean, + }, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_struct + records = [ + [{"field" => {"sub_field" => true}}], + [nil], + [{"field" => nil}], + [{"field" => {"sub_field" => nil}}], + ] + target = build({ + type: :struct, + fields: [ + { + name: :sub_field, + type: :boolean, + }, + ], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_map + records = [ + [{"field" => {"key1" => true, "key2" => nil}}], + [nil], + [{"field" => nil}], + ] + target = build({ + type: :map, + key: :string, + item: :boolean, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_sparse_union + omit("Need to add support for SparseUnionArrayBuilder") + records = [ + [{"field" => {"field1" => true}}], + [nil], + [{"field" => nil}], + [{"field" => {"field2" => nil}}], + ] + target = build({ + type: :sparse_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_dense_union + omit("Need to add support for DenseUnionArrayBuilder") + records = [ + [{"field" => {"field1" => true}}], + [nil], + [{"field" => nil}], + [{"field" => {"field2" => nil}}], + ] + target = build({ + type: :dense_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_dictionary + omit("Need to add support for DictionaryArrayBuilder") + records = [ + [{"field" => "Ruby"}], + [nil], + [{"field" => nil}], + [{"field" => "GLib"}], + ] + dictionary = Arrow::StringArray.new(["GLib", "Ruby"]) + target = build({ + type: :dictionary, + index_data_type: :int8, + dictionary: dictionary, + ordered: true, + }, + records) + assert_equal(records, target.raw_records) + end +end + +class RawRecordsRecordBatchStructArrayTest < Test::Unit::TestCase + include RawRecordsStructArrayTests + + def build(type, records) + Arrow::RecordBatch.new(build_schema(type), records) + end +end + +class RawRecordsTableStructArrayTest < Test::Unit::TestCase + include RawRecordsStructArrayTests + + def build(type, records) + Arrow::Table.new(build_schema(type), records) + end +end diff --git a/src/arrow/ruby/red-arrow/test/raw-records/test-table.rb b/src/arrow/ruby/red-arrow/test/raw-records/test-table.rb new file mode 100644 index 000000000..ae90217c2 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/raw-records/test-table.rb @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class RawRecordsTableTest < Test::Unit::TestCase + test("2 arrays") do + raw_record_batches = [ + [ + [true, nil, "Ruby"], + [nil, 0, "GLib"], + [false, 2 ** 8 - 1, nil], + ], + [ + [nil, 10, "A"], + [true, 20, "B"], + [false, nil, "C"], + [nil, 40, nil], + ] + ] + raw_records = raw_record_batches.inject do |all_records, record_batch| + all_records + record_batch + end + schema = [ + {name: :column0, type: :boolean}, + {name: :column1, type: :uint8}, + {name: :column2, type: :string}, + ] + record_batches = raw_record_batches.collect do |record_batch| + Arrow::RecordBatch.new(schema, record_batch) + end + table = Arrow::Table.new(schema, record_batches) + assert_equal(raw_records, table.raw_records) + end +end diff --git a/src/arrow/ruby/red-arrow/test/run-test.rb b/src/arrow/ruby/red-arrow/test/run-test.rb new file mode 100755 index 000000000..41ab73cb6 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/run-test.rb @@ -0,0 +1,71 @@ +#!/usr/bin/env ruby +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +$VERBOSE = true + +require "fileutils" +require "pathname" + +(ENV["ARROW_DLL_PATH"] || "").split(File::PATH_SEPARATOR).each do |path| + RubyInstaller::Runtime.add_dll_directory(path) +end + +base_dir = Pathname.new(__dir__).parent.expand_path + +lib_dir = base_dir + "lib" +ext_dir = base_dir + "ext" + "arrow" +test_dir = base_dir + "test" + +build_dir = ENV["BUILD_DIR"] +if build_dir + build_dir = File.join(build_dir, "red-arrow") + FileUtils.mkdir_p(build_dir) +else + build_dir = ext_dir +end + +make = nil +if ENV["NO_MAKE"] != "yes" + if ENV["MAKE"] + make = ENV["MAKE"] + elsif system("which gmake > #{File::NULL} 2>&1") + make = "gmake" + elsif system("which make > #{File::NULL} 2>&1") + make = "make" + end +end +if make + Dir.chdir(build_dir.to_s) do + unless File.exist?("Makefile") + system(RbConfig.ruby, + (ext_dir + "extconf.rb").to_s, + "--enable-debug-build") or exit(false) + end + system("#{make} > #{File::NULL}") or exit(false) + end +end + +$LOAD_PATH.unshift(build_dir.to_s) +$LOAD_PATH.unshift(lib_dir.to_s) + +require_relative "helper" + +ENV["TEST_UNIT_MAX_DIFF_TARGET_STRING_SIZE"] ||= "10000" + +exit(Test::Unit::AutoRunner.run(true, test_dir.to_s)) diff --git a/src/arrow/ruby/red-arrow/test/test-array-builder.rb b/src/arrow/ruby/red-arrow/test/test-array-builder.rb new file mode 100644 index 000000000..318167d51 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-array-builder.rb @@ -0,0 +1,136 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class ArrayBuilderTest < Test::Unit::TestCase + sub_test_case(".build") do + def assert_build(builder_class, raw_array) + array = builder_class.build(raw_array) + assert_equal(raw_array, array.to_a) + end + + sub_test_case("generic builder") do + test("strings") do + assert_build(Arrow::ArrayBuilder, + ["Hello", nil, "World"]) + end + + test("symbols") do + array = Arrow::ArrayBuilder.build([:hello, nil, :world]) + expected_builder = Arrow::StringDictionaryArrayBuilder.new + assert_equal(expected_builder.build(["hello", nil, "world"]), + array) + end + + test("boolean") do + assert_build(Arrow::ArrayBuilder, + [true, nil, false]) + end + + test("positive integers") do + assert_build(Arrow::ArrayBuilder, + [1, nil, 2, nil, 3]) + end + + test("negative integers") do + assert_build(Arrow::ArrayBuilder, + [nil, -1, nil, -2, nil, -3]) + end + + test("times") do + assert_build(Arrow::ArrayBuilder, + [Time.at(0), Time.at(1), Time.at(2)]) + end + + test("dates") do + assert_build(Arrow::ArrayBuilder, + [Date.new(2018, 1, 4), Date.new(2018, 1, 5)]) + end + + test("datetimes") do + assert_build(Arrow::ArrayBuilder, + [ + DateTime.new(2018, 1, 4, 23, 18, 23), + DateTime.new(2018, 1, 5, 0, 23, 21), + ]) + end + + test("list<boolean>s") do + assert_build(Arrow::ArrayBuilder, + [ + [nil, true, false], + nil, + [false], + ]) + end + + test("list<string>s") do + assert_build(Arrow::ArrayBuilder, + [ + ["Hello", "World"], + ["Apache Arrow"], + ]) + end + end + + sub_test_case("specific builder") do + test("empty") do + assert_build(Arrow::Int32ArrayBuilder, + []) + end + + test("values") do + assert_build(Arrow::Int32ArrayBuilder, + [1, -2]) + end + + test("values, nils") do + assert_build(Arrow::Int32ArrayBuilder, + [1, -2, nil, nil]) + end + + test("values, nils, values") do + assert_build(Arrow::Int32ArrayBuilder, + [1, -2, nil, nil, 3, -4]) + end + + test("values, nils, values, nils") do + assert_build(Arrow::Int32ArrayBuilder, + [1, -2, nil, nil, 3, -4, nil, nil]) + end + + test("nils") do + assert_build(Arrow::Int32ArrayBuilder, + [nil, nil]) + end + + test("nils, values") do + assert_build(Arrow::Int32ArrayBuilder, + [nil, nil, 3, -4]) + end + + test("nils, values, nil") do + assert_build(Arrow::Int32ArrayBuilder, + [nil, nil, 3, -4, nil, nil]) + end + + test("nils, values, nil, values") do + assert_build(Arrow::Int32ArrayBuilder, + [nil, nil, 3, -4, nil, nil, 5, -6]) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-array.rb b/src/arrow/ruby/red-arrow/test/test-array.rb new file mode 100644 index 000000000..2b7112da6 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-array.rb @@ -0,0 +1,325 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class ArrayTest < Test::Unit::TestCase + sub_test_case(".new") do + test("Boolean") do + array = Arrow::BooleanArray.new([true, false, true]) + assert_equal([true, false, true], + array.to_a) + end + end + + sub_test_case("instance methods") do + def setup + @values = [true, false, nil, true] + @array = Arrow::BooleanArray.new(@values) + end + + test("#each") do + assert_equal(@values, @array.to_a) + end + + sub_test_case("#[]") do + test("valid range") do + assert_equal(@values, + @array.length.times.collect {|i| @array[i]}) + end + + test("out of range") do + assert_nil(@array[@array.length]) + end + + test("negative index") do + assert_equal(@values.last, + @array[-1]) + end + end + + sub_test_case("#==") do + test("Arrow::Array") do + assert do + @array == @array + end + end + + test("not Arrow::Array") do + assert do + not (@array == 29) + end + end + end + + sub_test_case("#equal_array?") do + test("no options") do + array1 = Arrow::FloatArray.new([1.1, Float::NAN]) + array2 = Arrow::FloatArray.new([1.1, Float::NAN]) + assert do + not array1.equal_array?(array2) + end + end + + test("approx") do + array1 = Arrow::FloatArray.new([1.1]) + array2 = Arrow::FloatArray.new([1.100001]) + assert do + array1.equal_array?(array2, approx: true) + end + end + + test("nans-equal") do + array1 = Arrow::FloatArray.new([1.1, Float::NAN]) + array2 = Arrow::FloatArray.new([1.1, Float::NAN]) + assert do + array1.equal_array?(array2, nans_equal: true) + end + end + + test("absolute-tolerance") do + array1 = Arrow::FloatArray.new([1.1]) + array2 = Arrow::FloatArray.new([1.101]) + assert do + array1.equal_array?(array2, approx: true, absolute_tolerance: 0.01) + end + end + end + + sub_test_case("#cast") do + test("Symbol") do + assert_equal(Arrow::Int32Array.new([1, 2, 3]), + Arrow::StringArray.new(["1", "2", "3"]).cast(:int32)) + end + end + end + + sub_test_case("#filter") do + def setup + values = [true, false, false, true] + @array = Arrow::BooleanArray.new(values) + @options = Arrow::FilterOptions.new + @options.null_selection_behavior = :emit_null + end + + test("Array: boolean") do + filter = [nil, true, true, false] + filtered_array = Arrow::BooleanArray.new([nil, false, false]) + assert_equal(filtered_array, + @array.filter(filter, @options)) + end + + test("Arrow::BooleanArray") do + filter = Arrow::BooleanArray.new([nil, true, true, false]) + filtered_array = Arrow::BooleanArray.new([nil, false, false]) + assert_equal(filtered_array, + @array.filter(filter, @options)) + end + + test("Arrow::ChunkedArray") do + chunks = [ + Arrow::BooleanArray.new([nil, true]), + Arrow::BooleanArray.new([true, false]), + ] + filter = Arrow::ChunkedArray.new(chunks) + filtered_array = Arrow::BooleanArray.new([nil, false, false]) + assert_equal(filtered_array, + @array.filter(filter, @options)) + end + end + + sub_test_case("#take") do + def setup + values = [1, 0 ,2] + @array = Arrow::Int16Array.new(values) + end + + test("Arrow: boolean") do + indices = [1, 0, 2] + assert_equal(Arrow::Int16Array.new([0, 1, 2]), + @array.take(indices)) + end + + test("Arrow::Array") do + indices = Arrow::Int16Array.new([1, 0, 2]) + assert_equal(Arrow::Int16Array.new([0, 1, 2]), + @array.take(indices)) + end + + test("Arrow::ChunkedArray") do + taken_chunks = [ + Arrow::Int16Array.new([0, 1]), + Arrow::Int16Array.new([2]) + ] + taken_chunked_array = Arrow::ChunkedArray.new(taken_chunks) + indices_chunks = [ + Arrow::Int16Array.new([1, 0]), + Arrow::Int16Array.new([2]) + ] + indices = Arrow::ChunkedArray.new(indices_chunks) + assert_equal(taken_chunked_array, + @array.take(indices)) + end + end + + sub_test_case("#is_in") do + def setup + values = [1, 0, 1, 2] + @array = Arrow::Int16Array.new(values) + end + + test("Arrow: Array") do + right = [2, 0] + assert_equal(Arrow::BooleanArray.new([false, true, false, true]), + @array.is_in(right)) + end + + test("Arrow::Array") do + right = Arrow::Int16Array.new([2, 0]) + assert_equal(Arrow::BooleanArray.new([false, true, false, true]), + @array.is_in(right)) + end + + test("Arrow::ChunkedArray") do + chunks = [ + Arrow::Int16Array.new([1, 4]), + Arrow::Int16Array.new([0, 3]) + ] + right = Arrow::ChunkedArray.new(chunks) + assert_equal(Arrow::BooleanArray.new([true, true, true, false]), + @array.is_in(right)) + end + end + + sub_test_case("#concatenate") do + test("Arrow::Array: same") do + assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]), + Arrow::Int32Array.new([1, 2, nil]). + concatenate(Arrow::Int32Array.new([4, 5]), + Arrow::Int32Array.new([6]))) + end + + test("Arrow::Array: castable") do + assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]), + Arrow::Int32Array.new([1, 2, nil]). + concatenate(Arrow::Int8Array.new([4, 5]), + Arrow::UInt32Array.new([6]))) + end + + test("Arrow::Array: non-castable") do + assert_raise(Arrow::Error::Invalid) do + Arrow::Int32Array.new([1, 2, nil]). + concatenate(Arrow::StringArray.new(["X"])) + end + end + + test("Array") do + assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,nil, 6]), + Arrow::Int32Array.new([1, 2, nil]). + concatenate([4, nil], + [6])) + end + + test("invalid") do + message = "[array][resolve] can't build int32 array: 4" + assert_raise(ArgumentError.new(message)) do + Arrow::Int32Array.new([1, 2, nil]). + concatenate(4) + end + end + end + + sub_test_case("#+") do + test("Arrow::Array: same") do + assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]), + Arrow::Int32Array.new([1, 2, nil]) + + Arrow::Int32Array.new([4, 5, 6])) + end + + test("Arrow::Array: castable") do + assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]), + Arrow::Int32Array.new([1, 2, nil]) + + Arrow::Int8Array.new([4, 5, 6])) + end + + test("Arrow::Array: non-castable") do + assert_raise(Arrow::Error::Invalid) do + Arrow::Int32Array.new([1, 2, nil]) + + Arrow::StringArray.new(["X"]) + end + end + + test("Array") do + assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,nil, 6]), + Arrow::Int32Array.new([1, 2, nil]) + + [4, nil, 6]) + end + + test("invalid") do + message = "[array][resolve] can't build int32 array: 4" + assert_raise(ArgumentError.new(message)) do + Arrow::Int32Array.new([1, 2, nil]) + 4 + end + end + end + + sub_test_case("#resolve") do + test("Arrow::Array: same") do + assert_equal(Arrow::Int32Array.new([1, 2, nil]), + Arrow::Int32Array.new([]). + resolve(Arrow::Int32Array.new([1, 2, nil]))) + end + + test("Arrow::Array: castable") do + assert_equal(Arrow::Int32Array.new([1, 2, nil]), + Arrow::Int32Array.new([]). + resolve(Arrow::Int8Array.new([1, 2, nil]))) + end + + test("Arrow::Array: non-castable") do + assert_raise(Arrow::Error::Invalid) do + Arrow::Int32Array.new([]) + + Arrow::StringArray.new(["X"]) + end + end + + test("Array: non-parametric") do + assert_equal(Arrow::Int32Array.new([1, 2, nil]), + Arrow::Int32Array.new([]). + resolve([1, 2, nil])) + end + + test("Array: parametric") do + list_data_type = Arrow::ListDataType.new(name: "visible", type: :boolean) + list_array = Arrow::ListArray.new(list_data_type, []) + assert_equal(Arrow::ListArray.new(list_data_type, + [ + [true, false], + nil, + ]), + list_array.resolve([ + [true, false], + nil, + ])) + end + + test("invalid") do + message = "[array][resolve] can't build int32 array: 4" + assert_raise(ArgumentError.new(message)) do + Arrow::Int32Array.new([]).resolve(4) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-bigdecimal.rb b/src/arrow/ruby/red-arrow/test/test-bigdecimal.rb new file mode 100644 index 000000000..424f12d39 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-bigdecimal.rb @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class BigDecimalTest < Test::Unit::TestCase + sub_test_case("#to_arrow") do + def test_128_positive + assert_equal(Arrow::Decimal128.new("0.1e38"), + BigDecimal("0.1e38").to_arrow) + end + + def test_128_negative + assert_equal(Arrow::Decimal128.new("-0.1e38"), + BigDecimal("-0.1e38").to_arrow) + end + + def test_256_positive + assert_equal(Arrow::Decimal256.new("0.1e39"), + BigDecimal("0.1e39").to_arrow) + end + + def test_256_negative + assert_equal(Arrow::Decimal256.new("-0.1e39"), + BigDecimal("-0.1e39").to_arrow) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-binary-dictionary-array-builder.rb b/src/arrow/ruby/red-arrow/test/test-binary-dictionary-array-builder.rb new file mode 100644 index 000000000..743dbae5e --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-binary-dictionary-array-builder.rb @@ -0,0 +1,103 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class BinaryDictionaryArrayBuilderTest < Test::Unit::TestCase + def setup + @builder = Arrow::BinaryDictionaryArrayBuilder.new + end + + sub_test_case("#append_values") do + test("[nil]") do + @builder.append_values([nil]) + array = @builder.finish + assert_equal([ + [], + [nil], + ], + [ + array.dictionary.to_a, + array.indices.to_a, + ]) + end + + test("[String]") do + @builder.append_values(["he\xffllo"]) + array = @builder.finish + assert_equal([ + ["he\xffllo".b], + [0], + ], + [ + array.dictionary.to_a, + array.indices.to_a, + ]) + end + + test("[Symbol]") do + @builder.append_values([:hello]) + array = @builder.finish + assert_equal([ + ["hello"], + [0], + ], + [ + array.dictionary.to_a, + array.indices.to_a, + ]) + end + + test("[nil, String, Symbol]") do + @builder.append_values([ + nil, + "He\xffllo", + :world, + "world", + ]) + array = @builder.finish + assert_equal([ + ["He\xffllo".b, "world"], + [nil, 0, 1, 1], + ], + [ + array.dictionary.to_a, + array.indices.to_a, + ]) + end + + test("is_valids") do + @builder.append_values([ + "He\xffllo", + :world, + :goodbye, + ], + [ + true, + false, + true, + ]) + array = @builder.finish + assert_equal([ + ["He\xffllo".b, "goodbye"], + [0, nil, 1], + ], + [ + array.dictionary.to_a, + array.indices.to_a, + ]) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-boolean-scalar.rb b/src/arrow/ruby/red-arrow/test/test-boolean-scalar.rb new file mode 100644 index 000000000..1053d1716 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-boolean-scalar.rb @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class BooleanScalarTest < Test::Unit::TestCase + def setup + @scalar = Arrow::BooleanScalar.new(true) + end + + test("#value") do + assert_equal(true, @scalar.value) + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-buffer.rb b/src/arrow/ruby/red-arrow/test/test-buffer.rb new file mode 100644 index 000000000..b47a1abba --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-buffer.rb @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class BufferTest < Test::Unit::TestCase + sub_test_case(".new") do + test("GC") do + data = "Hello" + data_id = data.object_id + _buffer = Arrow::Buffer.new(data) + data = nil + GC.start + assert_equal("Hello", ObjectSpace._id2ref(data_id)) + end + end + + sub_test_case("instance methods") do + def setup + @buffer = Arrow::Buffer.new("Hello") + end + + sub_test_case("#==") do + test("Arrow::Buffer") do + assert do + @buffer == @buffer + end + end + + test("not Arrow::Buffer") do + assert do + not (@buffer == 29) + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-chunked-array.rb b/src/arrow/ruby/red-arrow/test/test-chunked-array.rb new file mode 100644 index 000000000..3785e9868 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-chunked-array.rb @@ -0,0 +1,183 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class ChunkedArrayTest < Test::Unit::TestCase + test("#each") do + arrays = [ + Arrow::BooleanArray.new([true, false]), + Arrow::BooleanArray.new([nil, true]), + ] + chunked_array = Arrow::ChunkedArray.new(arrays) + assert_equal([true, false, nil, true], + chunked_array.to_a) + end + + sub_test_case("#pack") do + test("basic array") do + arrays = [ + Arrow::BooleanArray.new([true, false]), + Arrow::BooleanArray.new([nil, true]), + ] + chunked_array = Arrow::ChunkedArray.new(arrays) + packed_chunked_array = chunked_array.pack + assert_equal([ + Arrow::BooleanArray, + [true, false, nil, true], + ], + [ + packed_chunked_array.class, + packed_chunked_array.to_a, + ]) + end + + test("TimestampArray") do + type = Arrow::TimestampDataType.new(:nano) + arrays = [ + Arrow::TimestampArrayBuilder.new(type).build([Time.at(0)]), + Arrow::TimestampArrayBuilder.new(type).build([Time.at(1)]), + ] + chunked_array = Arrow::ChunkedArray.new(arrays) + packed_chunked_array = chunked_array.pack + assert_equal([ + Arrow::TimestampArray, + [Time.at(0), Time.at(1)], + ], + [ + packed_chunked_array.class, + packed_chunked_array.to_a, + ]) + end + end + + sub_test_case("#==") do + def setup + arrays = [ + Arrow::BooleanArray.new([true]), + Arrow::BooleanArray.new([false, true]), + ] + @chunked_array = Arrow::ChunkedArray.new(arrays) + end + + test("Arrow::ChunkedArray") do + assert do + @chunked_array == @chunked_array + end + end + + test("not Arrow::ChunkedArray") do + assert do + not (@chunked_array == 29) + end + end + end + + sub_test_case("#filter") do + def setup + arrays = [ + Arrow::BooleanArray.new([false, true]), + Arrow::BooleanArray.new([false, true, false]), + ] + @chunked_array = Arrow::ChunkedArray.new(arrays) + @options = Arrow::FilterOptions.new + @options.null_selection_behavior = :emit_null + end + + test("Array: boolean") do + filter = [nil, true, true, false, true] + chunks = [ + Arrow::BooleanArray.new([nil, true]), + Arrow::BooleanArray.new([false, false]), + ] + filtered_chunked_array = Arrow::ChunkedArray.new(chunks) + assert_equal(filtered_chunked_array, + @chunked_array.filter(filter, @options)) + end + + test("Arrow::BooleanArray") do + filter = Arrow::BooleanArray.new([nil, true, true, false, true]) + chunks = [ + Arrow::BooleanArray.new([nil, true]), + Arrow::BooleanArray.new([false, false]), + ] + filtered_chunked_array = Arrow::ChunkedArray.new(chunks) + assert_equal(filtered_chunked_array, + @chunked_array.filter(filter, @options)) + end + + test("Arrow::ChunkedArray") do + chunks = [ + Arrow::BooleanArray.new([nil, true]), + Arrow::BooleanArray.new([true, false, true]), + ] + filter = Arrow::ChunkedArray.new(chunks) + filtered_chunks = [ + Arrow::BooleanArray.new([nil, true]), + Arrow::BooleanArray.new([false, false]), + ] + filtered_chunked_array = Arrow::ChunkedArray.new(filtered_chunks) + assert_equal(filtered_chunked_array, + @chunked_array.filter(filter, @options)) + end + end + + sub_test_case("#take") do + def setup + chunks = [ + Arrow::Int16Array.new([1, 0]), + Arrow::Int16Array.new([2]), + ] + @chunked_array = Arrow::ChunkedArray.new(chunks) + end + + test("Arrow: boolean") do + chunks = [ + Arrow::Int16Array.new([0, 1]), + Arrow::Int16Array.new([2]) + ] + taken_chunked_array = Arrow::ChunkedArray.new(chunks) + indices = [1, 0, 2] + assert_equal(taken_chunked_array, + @chunked_array.take(indices)) + end + + test("Arrow::Array") do + chunks = [ + Arrow::Int16Array.new([0, 1]), + Arrow::Int16Array.new([2]) + ] + taken_chunked_array = Arrow::ChunkedArray.new(chunks) + indices = Arrow::Int16Array.new([1, 0, 2]) + assert_equal(taken_chunked_array, + @chunked_array.take(indices)) + end + + test("Arrow::ChunkedArray") do + taken_chunks = [ + Arrow::Int16Array.new([0, 1]), + Arrow::Int16Array.new([2]) + ] + taken_chunked_array = Arrow::ChunkedArray.new(taken_chunks) + indices_chunks = [ + Arrow::Int16Array.new([1, 0]), + Arrow::Int16Array.new([2]) + ] + indices = Arrow::ChunkedArray.new(indices_chunks) + assert_equal(taken_chunked_array, + @chunked_array.take(indices)) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-column.rb b/src/arrow/ruby/red-arrow/test/test-column.rb new file mode 100644 index 000000000..613b01ccc --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-column.rb @@ -0,0 +1,92 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class ColumnTest < Test::Unit::TestCase + def setup + table = Arrow::Table.new("visible" => [true, nil, false]) + @column = table.visible + end + + test("#name") do + assert_equal("visible", @column.name) + end + + test("#data_type") do + assert_equal(Arrow::BooleanDataType.new, @column.data_type) + end + + test("#null?") do + assert do + @column.null?(1) + end + end + + test("#valid?") do + assert do + @column.valid?(0) + end + end + + test("#each") do + assert_equal([true, nil, false], @column.each.to_a) + end + + test("#reverse_each") do + assert_equal([false, nil, true], @column.reverse_each.to_a) + end + + test("#n_rows") do + assert_equal(3, @column.n_rows) + end + + test("#n_nulls") do + assert_equal(1, @column.n_nulls) + end + + sub_test_case("#==") do + test("same value") do + table1 = Arrow::Table.new("visible" => [true, false]) + table2 = Arrow::Table.new("visible" => [true, false]) + assert do + table1.visible == table2.visible + end + end + + test("different name") do + table1 = Arrow::Table.new("visible" => [true, false]) + table2 = Arrow::Table.new("invisible" => [true, false]) + assert do + not table1.visible == table2.invisible + end + end + + test("different value") do + table1 = Arrow::Table.new("visible" => [true, false]) + table2 = Arrow::Table.new("visible" => [true, true]) + assert do + not table1.visible == table2.visible + end + end + + test("not Arrow::Column") do + table = Arrow::Table.new("visible" => [true, false]) + assert do + not table.visible == 29 + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-csv-loader.rb b/src/arrow/ruby/red-arrow/test/test-csv-loader.rb new file mode 100644 index 000000000..7f7f23498 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-csv-loader.rb @@ -0,0 +1,250 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class CSVLoaderTest < Test::Unit::TestCase + include Helper::Fixture + + def load_csv(input) + Arrow::CSVLoader.load(input, skip_lines: /^#/) + end + + sub_test_case(".load") do + test("String: data: with header") do + data = fixture_path("with-header-float.csv").read + assert_equal(<<-TABLE, load_csv(data).to_s) + name score +0 alice 10.100000 +1 bob 29.200000 +2 chris -1.300000 + TABLE + end + + test("String: data: without header") do + data = fixture_path("without-header-float.csv").read + assert_equal(<<-TABLE, load_csv(data).to_s) + 0 1 +0 alice 10.100000 +1 bob 29.200000 +2 chris -1.300000 + TABLE + end + + test("String: path: with header") do + path = fixture_path("with-header-float.csv").to_s + assert_equal(<<-TABLE, load_csv(path).to_s) + name score +0 alice 10.100000 +1 bob 29.200000 +2 chris -1.300000 + TABLE + end + + test("String: path: without header") do + path = fixture_path("without-header-float.csv").to_s + assert_equal(<<-TABLE, load_csv(path).to_s) + 0 1 +0 alice 10.100000 +1 bob 29.200000 +2 chris -1.300000 + TABLE + end + + test("Pathname: with header") do + path = fixture_path("with-header-float.csv") + assert_equal(<<-TABLE, load_csv(path).to_s) + name score +0 alice 10.100000 +1 bob 29.200000 +2 chris -1.300000 + TABLE + end + + test("Pathname: without header") do + path = fixture_path("without-header-float.csv") + assert_equal(<<-TABLE, load_csv(path).to_s) + 0 1 +0 alice 10.100000 +1 bob 29.200000 +2 chris -1.300000 + TABLE + end + + test("null: with double quote") do + path = fixture_path("null-with-double-quote.csv").to_s + assert_equal(<<-TABLE, load_csv(path).to_s) + name score +0 alice 10 +1 bob (null) +2 chris -1 + TABLE + end + + test("null: without double quote") do + path = fixture_path("null-without-double-quote.csv").to_s + assert_equal(<<-TABLE, load_csv(path).to_s) + name score +0 alice 10 +1 bob (null) +2 chris -1 + TABLE + end + + test("number: float, integer") do + path = fixture_path("float-integer.csv").to_s + assert_equal([2.9, 10, -1.1], + load_csv(path)[:score].to_a) + end + + test("number: integer, float") do + path = fixture_path("integer-float.csv").to_s + assert_equal([10.0, 2.9, -1.1], + load_csv(path)[:score].to_a) + end + end + + sub_test_case("CSVReader") do + def load_csv(data, **options) + Arrow::CSVLoader.load(data, **options) + end + + sub_test_case(":headers") do + test("true") do + values = Arrow::StringArray.new(["a", "b", "c"]) + assert_equal(Arrow::Table.new(value: values), + load_csv(<<-CSV, headers: true)) +value +a +b +c + CSV + end + + test(":first_line") do + values = Arrow::StringArray.new(["a", "b", "c"]) + assert_equal(Arrow::Table.new(value: values), + load_csv(<<-CSV, headers: :first_line)) +value +a +b +c + CSV + end + + test("truthy") do + values = Arrow::StringArray.new(["a", "b", "c"]) + assert_equal(Arrow::Table.new(value: values), + load_csv(<<-CSV, headers: 0)) +value +a +b +c + CSV + end + + test("Array of column names") do + values = Arrow::StringArray.new(["a", "b", "c"]) + assert_equal(Arrow::Table.new(column: values), + load_csv(<<-CSV, headers: ["column"])) +a +b +c + CSV + end + + test("false") do + values = Arrow::StringArray.new(["a", "b", "c"]) + assert_equal(Arrow::Table.new(f0: values), + load_csv(<<-CSV, headers: false)) +a +b +c + CSV + end + + test("nil") do + values = Arrow::StringArray.new(["a", "b", "c"]) + assert_equal(Arrow::Table.new(f0: values), + load_csv(<<-CSV, headers: nil)) +a +b +c + CSV + end + + test("string") do + values = Arrow::StringArray.new(["a", "b", "c"]) + assert_equal(Arrow::Table.new(column: values), + load_csv(<<-CSV, headers: "column")) +a +b +c + CSV + end + end + + test(":column_types") do + assert_equal(Arrow::Table.new(:count => Arrow::UInt16Array.new([1, 2, 4])), + load_csv(<<-CSV, column_types: {count: :uint16})) +count +1 +2 +4 + CSV + end + + test(":schema") do + table = Arrow::Table.new(:count => Arrow::UInt16Array.new([1, 2, 4])) + assert_equal(table, + load_csv(<<-CSV, schema: table.schema)) +count +1 +2 +4 + CSV + end + + test(":encoding") do + messages = [ + "\u3042", # U+3042 HIRAGANA LETTER A + "\u3044", # U+3044 HIRAGANA LETTER I + "\u3046", # U+3046 HIRAGANA LETTER U + ] + table = Arrow::Table.new(:message => Arrow::StringArray.new(messages)) + encoding = "cp932" + assert_equal(table, + load_csv((["message"] + messages).join("\n").encode(encoding), + schema: table.schema, + encoding: encoding)) + end + + test(":encoding and :compression") do + messages = [ + "\u3042", # U+3042 HIRAGANA LETTER A + "\u3044", # U+3044 HIRAGANA LETTER I + "\u3046", # U+3046 HIRAGANA LETTER U + ] + table = Arrow::Table.new(:message => Arrow::StringArray.new(messages)) + encoding = "cp932" + csv = (["message"] + messages).join("\n").encode(encoding) + assert_equal(table, + load_csv(Zlib::Deflate.deflate(csv), + schema: table.schema, + encoding: encoding, + compression: :gzip)) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-data-type.rb b/src/arrow/ruby/red-arrow/test/test-data-type.rb new file mode 100644 index 000000000..f54831780 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-data-type.rb @@ -0,0 +1,83 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class DataTypeTest < Test::Unit::TestCase + sub_test_case(".resolve") do + test("DataType") do + assert_equal(Arrow::BooleanDataType.new, + Arrow::DataType.resolve(Arrow::BooleanDataType.new)) + end + + test("String") do + assert_equal(Arrow::BooleanDataType.new, + Arrow::DataType.resolve("boolean")) + end + + test("Symbol") do + assert_equal(Arrow::BooleanDataType.new, + Arrow::DataType.resolve(:boolean)) + end + + test("Array") do + field = Arrow::Field.new(:visible, :boolean) + assert_equal(Arrow::ListDataType.new(field), + Arrow::DataType.resolve([:list, field])) + end + + test("Hash") do + field = Arrow::Field.new(:visible, :boolean) + assert_equal(Arrow::ListDataType.new(field), + Arrow::DataType.resolve(type: :list, field: field)) + end + + test("_") do + assert_equal(Arrow::FixedSizeBinaryDataType.new(10), + Arrow::DataType.resolve([:fixed_size_binary, 10])) + end + + test("abstract") do + message = + "abstract type: <:floating_point>: " + + "use one of not abstract type: [" + + "Arrow::DoubleDataType, " + + "Arrow::FloatDataType]" + assert_raise(ArgumentError.new(message)) do + Arrow::DataType.resolve(:floating_point) + end + end + end + + sub_test_case("instance methods") do + def setup + @data_type = Arrow::StringDataType.new + end + + sub_test_case("#==") do + test("Arrow::DataType") do + assert do + @data_type == @data_type + end + end + + test("not Arrow::DataType") do + assert do + not (@data_type == 29) + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-date32-array.rb b/src/arrow/ruby/red-arrow/test/test-date32-array.rb new file mode 100644 index 000000000..6918b48db --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-date32-array.rb @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Date32ArrayTest < Test::Unit::TestCase + test("#[]") do + n_days_since_epoch = 17406 # 2017-08-28 + array = Arrow::Date32Array.new([n_days_since_epoch]) + assert_equal(Date.new(2017, 8, 28), array[0]) + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-date64-array.rb b/src/arrow/ruby/red-arrow/test/test-date64-array.rb new file mode 100644 index 000000000..ec1c6db7c --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-date64-array.rb @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Date64ArrayTest < Test::Unit::TestCase + test("#[]") do + n_msecs_since_epoch = 1503878400000 # 2017-08-28T00:00:00Z + array = Arrow::Date64Array.new([n_msecs_since_epoch]) + assert_equal(DateTime.new(2017, 8, 28, 0, 0, 0), + array[0]) + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-decimal128-array-builder.rb b/src/arrow/ruby/red-arrow/test/test-decimal128-array-builder.rb new file mode 100644 index 000000000..31d58bd58 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-decimal128-array-builder.rb @@ -0,0 +1,112 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Decimal128ArrayBuilderTest < Test::Unit::TestCase + def setup + @data_type = Arrow::Decimal128DataType.new(3, 1) + @builder = Arrow::Decimal128ArrayBuilder.new(@data_type) + end + + sub_test_case("#append_value") do + test("nil") do + @builder.append_value(nil) + array = @builder.finish + assert_equal(nil, array[0]) + end + + test("Arrow::Decimal128") do + @builder.append_value(Arrow::Decimal128.new("10.1")) + array = @builder.finish + assert_equal(BigDecimal("10.1"), + array[0]) + end + + test("String") do + @builder.append_value("10.1") + array = @builder.finish + assert_equal(BigDecimal("10.1"), + array[0]) + end + + test("Float") do + @builder.append_value(10.1) + array = @builder.finish + assert_equal(BigDecimal("10.1"), + array[0]) + end + + test("BigDecimal") do + @builder.append_value(BigDecimal("10.1")) + array = @builder.finish + assert_equal(BigDecimal("10.1"), + array[0]) + end + end + + sub_test_case("#append_values") do + test("mixed") do + @builder.append_values([ + Arrow::Decimal128.new("10.1"), + nil, + "10.1", + 10.1, + BigDecimal("10.1"), + ]) + array = @builder.finish + assert_equal([ + BigDecimal("10.1"), + nil, + BigDecimal("10.1"), + BigDecimal("10.1"), + BigDecimal("10.1"), + ], + array.to_a) + end + + test("is_valids") do + @builder.append_values([ + Arrow::Decimal128.new("10.1"), + Arrow::Decimal128.new("10.1"), + Arrow::Decimal128.new("10.1"), + ], + [ + true, + false, + true, + ]) + array = @builder.finish + assert_equal([ + BigDecimal("10.1"), + nil, + BigDecimal("10.1"), + ], + array.to_a) + end + + test("packed") do + @builder.append_values(Arrow::Decimal128.new("10.1").to_bytes.to_s * 3, + [true, false, true]) + array = @builder.finish + assert_equal([ + BigDecimal("10.1"), + nil, + BigDecimal("10.1"), + ], + array.to_a) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-decimal128-array.rb b/src/arrow/ruby/red-arrow/test/test-decimal128-array.rb new file mode 100644 index 000000000..88ab1c26c --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-decimal128-array.rb @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Decimal128ArrayTest < Test::Unit::TestCase + sub_test_case(".new") do + test("build") do + data_type = Arrow::Decimal128DataType.new(3, 1) + values = [ + 10.1, + nil, + "10.1", + BigDecimal("10.1"), + ] + array = Arrow::Decimal128Array.new(data_type, values) + assert_equal([ + BigDecimal("10.1"), + nil, + BigDecimal("10.1"), + BigDecimal("10.1"), + ], + array.to_a) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-decimal128-data-type.rb b/src/arrow/ruby/red-arrow/test/test-decimal128-data-type.rb new file mode 100644 index 000000000..5390a7a44 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-decimal128-data-type.rb @@ -0,0 +1,31 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Decimal128DataTypeTest < Test::Unit::TestCase + sub_test_case(".new") do + test("ordered arguments") do + assert_equal("decimal128(8, 2)", + Arrow::Decimal128DataType.new(8, 2).to_s) + end + + test("description") do + assert_equal("decimal128(8, 2)", + Arrow::Decimal128DataType.new(precision: 8, + scale: 2).to_s) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-decimal128.rb b/src/arrow/ruby/red-arrow/test/test-decimal128.rb new file mode 100644 index 000000000..9e7f8792c --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-decimal128.rb @@ -0,0 +1,102 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Decimal128Test < Test::Unit::TestCase + sub_test_case("instance methods") do + def setup + @decimal128 = Arrow::Decimal128.new("10.1") + end + + sub_test_case("#==") do + test("Arrow::Decimal128") do + assert do + @decimal128 == @decimal128 + end + end + + test("not Arrow::Decimal128") do + assert do + not (@decimal128 == 10.1) + end + end + end + + sub_test_case("#!=") do + test("Arrow::Decimal128") do + assert do + not (@decimal128 != @decimal128) + end + end + + test("not Arrow::Decimal128") do + assert do + @decimal128 != 10.1 + end + end + end + + sub_test_case("#to_s") do + test("default") do + assert_equal("101", + @decimal128.to_s) + end + + test("scale") do + assert_equal("10.1", + @decimal128.to_s(1)) + end + end + + test("#abs") do + decimal128 = Arrow::Decimal128.new("-10.1") + assert_equal([ + Arrow::Decimal128.new("-10.1"), + Arrow::Decimal128.new("10.1"), + ], + [ + decimal128, + decimal128.abs, + ]) + end + + test("#abs!") do + decimal128 = Arrow::Decimal128.new("-10.1") + decimal128.abs! + assert_equal(Arrow::Decimal128.new("10.1"), + decimal128) + end + + test("#negate") do + decimal128 = Arrow::Decimal128.new("-10.1") + assert_equal([ + Arrow::Decimal128.new("-10.1"), + Arrow::Decimal128.new("10.1"), + ], + [ + decimal128, + decimal128.negate, + ]) + end + + test("#negate!") do + decimal128 = Arrow::Decimal128.new("-10.1") + decimal128.negate! + assert_equal(Arrow::Decimal128.new("10.1"), + decimal128) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-decimal256-array-builder.rb b/src/arrow/ruby/red-arrow/test/test-decimal256-array-builder.rb new file mode 100644 index 000000000..f0769b662 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-decimal256-array-builder.rb @@ -0,0 +1,112 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Decimal256ArrayBuilderTest < Test::Unit::TestCase + def setup + @data_type = Arrow::Decimal256DataType.new(3, 1) + @builder = Arrow::Decimal256ArrayBuilder.new(@data_type) + end + + sub_test_case("#append_value") do + test("nil") do + @builder.append_value(nil) + array = @builder.finish + assert_equal(nil, array[0]) + end + + test("Arrow::Decimal256") do + @builder.append_value(Arrow::Decimal256.new("10.1")) + array = @builder.finish + assert_equal(BigDecimal("10.1"), + array[0]) + end + + test("String") do + @builder.append_value("10.1") + array = @builder.finish + assert_equal(BigDecimal("10.1"), + array[0]) + end + + test("Float") do + @builder.append_value(10.1) + array = @builder.finish + assert_equal(BigDecimal("10.1"), + array[0]) + end + + test("BigDecimal") do + @builder.append_value(BigDecimal("10.1")) + array = @builder.finish + assert_equal(BigDecimal("10.1"), + array[0]) + end + end + + sub_test_case("#append_values") do + test("mixed") do + @builder.append_values([ + Arrow::Decimal256.new("10.1"), + nil, + "10.1", + 10.1, + BigDecimal("10.1"), + ]) + array = @builder.finish + assert_equal([ + BigDecimal("10.1"), + nil, + BigDecimal("10.1"), + BigDecimal("10.1"), + BigDecimal("10.1"), + ], + array.to_a) + end + + test("is_valids") do + @builder.append_values([ + Arrow::Decimal256.new("10.1"), + Arrow::Decimal256.new("10.1"), + Arrow::Decimal256.new("10.1"), + ], + [ + true, + false, + true, + ]) + array = @builder.finish + assert_equal([ + BigDecimal("10.1"), + nil, + BigDecimal("10.1"), + ], + array.to_a) + end + + test("packed") do + @builder.append_values(Arrow::Decimal256.new("10.1").to_bytes.to_s * 3, + [true, false, true]) + array = @builder.finish + assert_equal([ + BigDecimal("10.1"), + nil, + BigDecimal("10.1"), + ], + array.to_a) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-decimal256-array.rb b/src/arrow/ruby/red-arrow/test/test-decimal256-array.rb new file mode 100644 index 000000000..7049a4509 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-decimal256-array.rb @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Decimal256ArrayTest < Test::Unit::TestCase + sub_test_case(".new") do + test("build") do + data_type = Arrow::Decimal256DataType.new(3, 1) + values = [ + 10.1, + nil, + "10.1", + BigDecimal("10.1"), + ] + array = Arrow::Decimal256Array.new(data_type, values) + assert_equal([ + BigDecimal("10.1"), + nil, + BigDecimal("10.1"), + BigDecimal("10.1"), + ], + array.to_a) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-decimal256-data-type.rb b/src/arrow/ruby/red-arrow/test/test-decimal256-data-type.rb new file mode 100644 index 000000000..96b2a505b --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-decimal256-data-type.rb @@ -0,0 +1,31 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Decimal256DataTypeTest < Test::Unit::TestCase + sub_test_case(".new") do + test("ordered arguments") do + assert_equal("decimal256(8, 2)", + Arrow::Decimal256DataType.new(8, 2).to_s) + end + + test("description") do + assert_equal("decimal256(8, 2)", + Arrow::Decimal256DataType.new(precision: 8, + scale: 2).to_s) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-decimal256.rb b/src/arrow/ruby/red-arrow/test/test-decimal256.rb new file mode 100644 index 000000000..422167f99 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-decimal256.rb @@ -0,0 +1,102 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Decimal256Test < Test::Unit::TestCase + sub_test_case("instance methods") do + def setup + @decimal256 = Arrow::Decimal256.new("10.1") + end + + sub_test_case("#==") do + test("Arrow::Decimal256") do + assert do + @decimal256 == @decimal256 + end + end + + test("not Arrow::Decimal256") do + assert do + not (@decimal256 == 10.1) + end + end + end + + sub_test_case("#!=") do + test("Arrow::Decimal256") do + assert do + not (@decimal256 != @decimal256) + end + end + + test("not Arrow::Decimal256") do + assert do + @decimal256 != 10.1 + end + end + end + + sub_test_case("#to_s") do + test("default") do + assert_equal("101", + @decimal256.to_s) + end + + test("scale") do + assert_equal("10.1", + @decimal256.to_s(1)) + end + end + + test("#abs") do + decimal256 = Arrow::Decimal256.new("-10.1") + assert_equal([ + Arrow::Decimal256.new("-10.1"), + Arrow::Decimal256.new("10.1"), + ], + [ + decimal256, + decimal256.abs, + ]) + end + + test("#abs!") do + decimal256 = Arrow::Decimal256.new("-10.1") + decimal256.abs! + assert_equal(Arrow::Decimal256.new("10.1"), + decimal256) + end + + test("#negate") do + decimal256 = Arrow::Decimal256.new("-10.1") + assert_equal([ + Arrow::Decimal256.new("-10.1"), + Arrow::Decimal256.new("10.1"), + ], + [ + decimal256, + decimal256.negate, + ]) + end + + test("#negate!") do + decimal256 = Arrow::Decimal256.new("-10.1") + decimal256.negate! + assert_equal(Arrow::Decimal256.new("10.1"), + decimal256) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-dense-union-data-type.rb b/src/arrow/ruby/red-arrow/test/test-dense-union-data-type.rb new file mode 100644 index 000000000..d8da6f772 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-dense-union-data-type.rb @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class DenseUnionDataTypeTest < Test::Unit::TestCase + sub_test_case(".new") do + def setup + @fields = [ + Arrow::Field.new("visible", :boolean), + { + name: "count", + type: :int32, + }, + ] + end + + test("ordered arguments") do + assert_equal("dense_union<visible: bool=2, count: int32=9>", + Arrow::DenseUnionDataType.new(@fields, [2, 9]).to_s) + end + + test("description") do + assert_equal("dense_union<visible: bool=2, count: int32=9>", + Arrow::DenseUnionDataType.new(fields: @fields, + type_codes: [2, 9]).to_s) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-dictionary-array.rb b/src/arrow/ruby/red-arrow/test/test-dictionary-array.rb new file mode 100644 index 000000000..83368e9ec --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-dictionary-array.rb @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class DictionaryArrayTest < Test::Unit::TestCase + sub_test_case("instance methods") do + def setup + @values = ["a", "b", "c", "b", "a"] + @string_array = Arrow::StringArray.new(@values) + @array = @string_array.dictionary_encode + end + + test("#[]") do + assert_equal(@values, @array.to_a) + end + + test("#get_value") do + assert_equal([ + @values[0], + @values[3], + ], + [ + @array.get_value(0), + @array.get_value(3), + ]) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-dictionary-data-type.rb b/src/arrow/ruby/red-arrow/test/test-dictionary-data-type.rb new file mode 100644 index 000000000..c5b6dd1bf --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-dictionary-data-type.rb @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class DictionaryDataTypeTest < Test::Unit::TestCase + sub_test_case(".new") do + def setup + @index_data_type = :int8 + @value_data_type = :string + @ordered = true + end + + test("ordered arguments") do + assert_equal("dictionary<values=string, indices=int8, ordered=1>", + Arrow::DictionaryDataType.new(@index_data_type, + @value_data_type, + @ordered).to_s) + end + + test("description") do + assert_equal("dictionary<values=string, indices=int8, ordered=1>", + Arrow::DictionaryDataType.new(index_data_type: @index_data_type, + value_data_type: @value_data_type, + ordered: @ordered).to_s) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-expression.rb b/src/arrow/ruby/red-arrow/test/test-expression.rb new file mode 100644 index 000000000..e172e78be --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-expression.rb @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestExpression < Test::Unit::TestCase + sub_test_case(".try_convert") do + test("Symbol") do + assert_equal(Arrow::FieldExpression.new("visible"), + Arrow::Expression.try_convert(:visible)) + end + + test("[String]") do + assert_equal(Arrow::CallExpression.new("func", []), + Arrow::Expression.try_convert(["func"])) + end + + test("[Symbol]") do + assert_equal(Arrow::CallExpression.new("func", []), + Arrow::Expression.try_convert([:func])) + end + + test("[String, String]") do + assert_equal(Arrow::CallExpression.new("func", ["argument1"]), + Arrow::Expression.try_convert(["func", "argument1"])) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-feather.rb b/src/arrow/ruby/red-arrow/test/test-feather.rb new file mode 100644 index 000000000..21d8a2c31 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-feather.rb @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class FeatherTest < Test::Unit::TestCase + include Helper::Fixture + + def setup + columns = { + "message" => Arrow::StringArray.new(["Start", "Crash", "Shutdown"]), + "is_critical" => Arrow::BooleanArray.new([false, true, false]), + } + @table = Arrow::Table.new(columns) + + @output = Tempfile.new(["red-arrow", ".feather"]) + begin + yield(@output) + ensure + @output.close! + end + end + + def test_default + @table.save(@output.path) + @output.close + + assert_equal(@table, Arrow::Table.load(@output.path)) + end + + def test_compression + @table.save(@output.path, compression: :zstd) + @output.close + + assert_equal(@table, Arrow::Table.load(@output.path)) + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-field.rb b/src/arrow/ruby/red-arrow/test/test-field.rb new file mode 100644 index 000000000..1b6bc4b17 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-field.rb @@ -0,0 +1,91 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class FieldTest < Test::Unit::TestCase + sub_test_case(".new") do + test("String, Arrow::DataType") do + assert_equal("visible: bool", + Arrow::Field.new("visible", Arrow::BooleanDataType.new).to_s) + end + + test("Symbol, Arrow::DataType") do + assert_equal("visible: bool", + Arrow::Field.new(:visible, Arrow::BooleanDataType.new).to_s) + end + + test("String, Symbol") do + assert_equal("visible: bool", + Arrow::Field.new(:visible, :boolean).to_s) + end + + test("String, Hash") do + assert_equal("visible: bool", + Arrow::Field.new(:visible, type: :boolean).to_s) + end + + test("description: String") do + assert_equal("visible: bool", + Arrow::Field.new(name: "visible", + data_type: :boolean).to_s) + end + + test("description: Symbol") do + assert_equal("visible: bool", + Arrow::Field.new(name: :visible, + data_type: :boolean).to_s) + end + + test("description: shortcut") do + assert_equal("visible: bool", + Arrow::Field.new(name: :visible, + type: :boolean).to_s) + end + + test("Hash: shortcut: additional") do + description = { + name: :tags, + type: :list, + field: { + name: "tag", + type: :string, + }, + } + assert_equal("tags: list<tag: string>", + Arrow::Field.new(description).to_s) + end + end + + sub_test_case("instance methods") do + def setup + @field = Arrow::Field.new("count", :uint32) + end + + sub_test_case("#==") do + test("Arrow::Field") do + assert do + @field == @field + end + end + + test("not Arrow::Field") do + assert do + not (@field == 29) + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-file-output-stream.rb b/src/arrow/ruby/red-arrow/test/test-file-output-stream.rb new file mode 100644 index 000000000..559406a4e --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-file-output-stream.rb @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFileOutputStream < Test::Unit::TestCase + sub_test_case(".open") do + def setup + @file = Tempfile.open("arrow-file-output-stream") + @file.write("Hello") + @file.close + end + + def test_default + Arrow::FileOutputStream.open(@file.path) do |file| + file.write(" World") + end + assert_equal(" World", File.read(@file.path)) + end + + def test_options_append + Arrow::FileOutputStream.open(@file.path, append: true) do |file| + file.write(" World") + end + assert_equal("Hello World", File.read(@file.path)) + end + + def test_append_true + Arrow::FileOutputStream.open(@file.path, true) do |file| + file.write(" World") + end + assert_equal("Hello World", File.read(@file.path)) + end + + def test_append_false + Arrow::FileOutputStream.open(@file.path, false) do |file| + file.write(" World") + end + assert_equal(" World", File.read(@file.path)) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-fixed-size-binary-array-builder.rb b/src/arrow/ruby/red-arrow/test/test-fixed-size-binary-array-builder.rb new file mode 100644 index 000000000..fae79f285 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-fixed-size-binary-array-builder.rb @@ -0,0 +1,92 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class FixedSizeBinaryArrayBuilderTest < Test::Unit::TestCase + def setup + @data_type = Arrow::FixedSizeBinaryDataType.new(4) + @builder = Arrow::FixedSizeBinaryArrayBuilder.new(@data_type) + end + + sub_test_case("#append_value") do + test("nil") do + @builder.append_value(nil) + array = @builder.finish + assert_equal(nil, array[0]) + end + + test("String") do + @builder.append_value("0123") + array = @builder.finish + assert_equal("0123", array[0]) + end + + test("GLib::Bytes") do + @builder.append_value(GLib::Bytes.new("0123")) + array = @builder.finish + assert_equal("0123", array[0]) + end + end + + sub_test_case("#append_values") do + test("mixed") do + @builder.append_values([ + "0123", + nil, + GLib::Bytes.new("abcd"), + ]) + array = @builder.finish + assert_equal([ + "0123", + nil, + "abcd", + ], + array.to_a) + end + + test("is_valids") do + @builder.append_values([ + "0123", + "0123", + "0123", + ], + [ + true, + false, + true, + ]) + array = @builder.finish + assert_equal([ + "0123", + nil, + "0123", + ], + array.to_a) + end + + test("packed") do + @builder.append_values("0123" * 3, + [true, false, true]) + array = @builder.finish + assert_equal([ + "0123", + nil, + "0123", + ], + array.to_a) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-fixed-size-binary-array.rb b/src/arrow/ruby/red-arrow/test/test-fixed-size-binary-array.rb new file mode 100644 index 000000000..3cb46b964 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-fixed-size-binary-array.rb @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class FixedSizeBinaryArrayTest < Test::Unit::TestCase + sub_test_case(".new") do + test("build") do + data_type = Arrow::FixedSizeBinaryDataType.new(4) + values = [ + "0123", + nil, + GLib::Bytes.new("abcd"), + ] + array = Arrow::FixedSizeBinaryArray.new(data_type, values) + assert_equal([ + "0123", + nil, + "abcd", + ], + array.to_a) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-float-scalar.rb b/src/arrow/ruby/red-arrow/test/test-float-scalar.rb new file mode 100644 index 000000000..1117d7728 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-float-scalar.rb @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class FloatScalarTest < Test::Unit::TestCase + sub_test_case("#equal_scalar?") do + test("no options") do + scalar1 = Arrow::FloatScalar.new(1.1) + scalar2 = Arrow::FloatScalar.new(1.1000001) + assert do + not scalar1.equal_scalar?(scalar2) + end + end + + test(":approx") do + scalar1 = Arrow::FloatScalar.new(1.1) + scalar2 = Arrow::FloatScalar.new(1.1000001) + assert do + scalar1.equal_scalar?(scalar2, approx: true) + end + end + + test(":absolute_tolerance") do + scalar1 = Arrow::FloatScalar.new(1.1) + scalar2 = Arrow::FloatScalar.new(1.1001) + assert do + scalar1.equal_scalar?(scalar2, + approx: true, + absolute_tolerance: 0.001) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-function.rb b/src/arrow/ruby/red-arrow/test/test-function.rb new file mode 100644 index 000000000..95667e66c --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-function.rb @@ -0,0 +1,176 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class FunctionTest < Test::Unit::TestCase + sub_test_case("#execute") do + test("Arrow::Array") do + or_function = Arrow::Function.find("or") + args = [ + Arrow::BooleanArray.new([true, false, false]), + Arrow::BooleanArray.new([true, false, true]), + ] + assert_equal([true, false, true], + or_function.execute(args).value.to_a) + end + + test("Array") do + or_function = Arrow::Function.find("or") + args = [ + [true, false, false], + [true, false, true], + ] + assert_equal([true, false, true], + or_function.execute(args).value.to_a) + end + + test("Arrow::ChunkedArray") do + or_function = Arrow::Function.find("or") + args = [ + Arrow::ChunkedArray.new([ + Arrow::BooleanArray.new([true]), + Arrow::BooleanArray.new([false, false]), + ]), + Arrow::ChunkedArray.new([ + Arrow::BooleanArray.new([true, false]), + Arrow::BooleanArray.new([true]), + ]), + ] + assert_equal([true, false, true], + or_function.execute(args).value.to_a) + end + + test("Arrow::Scalar") do + add_function = Arrow::Function.find("add") + args = [ + Arrow::Int8Array.new([1, 2, 3]), + Arrow::Int8Scalar.new(5), + ] + assert_equal([6, 7, 8], + add_function.execute(args).value.to_a) + end + + test("Integer") do + add_function = Arrow::Function.find("add") + args = [ + [1, 2, 3], + 5, + ] + assert_equal([6, 7, 8], + add_function.execute(args).value.to_a) + end + + test("Float") do + add_function = Arrow::Function.find("add") + args = [ + [1, 2, 3], + 5.1, + ] + assert_equal([6.1, 7.1, 8.1], + add_function.execute(args).value.to_a) + end + + test("true") do + and_function = Arrow::Function.find("and") + args = [ + Arrow::BooleanArray.new([true, false, false]), + true, + ] + assert_equal([true, false, false], + and_function.execute(args).value.to_a) + end + + test("false") do + or_function = Arrow::Function.find("or") + args = [ + Arrow::BooleanArray.new([true, false, false]), + false, + ] + assert_equal([true, false, false], + or_function.execute(args).value.to_a) + end + + test("String") do + ascii_upper_function = Arrow::Function.find("ascii_upper") + args = [ + "Hello", + ] + assert_equal("HELLO", + ascii_upper_function.execute(args).value.to_s) + end + + test("Date") do + cast_function = Arrow::Function.find("cast") + date = Date.new(2021, 6, 12) + args = [date] + options = Arrow::CastOptions.new + options.to_data_type = Arrow::TimestampDataType.new(:second) + time = Time.utc(date.year, + date.month, + date.day) + assert_equal(Arrow::TimestampScalar.new(options.to_data_type, + time.to_i), + cast_function.execute(args, options).value) + end + + test("Arrow::Time: second") do + cast_function = Arrow::Function.find("cast") + arrow_time = Arrow::Time.new(Arrow::TimeUnit::SECOND, + # 00:10:00 + 60 * 10) + args = [arrow_time] + options = Arrow::CastOptions.new + options.to_data_type = Arrow::Time64DataType.new(:micro) + assert_equal(Arrow::Time64Scalar.new(options.to_data_type, + # 00:10:00.000000 + 60 * 10 * 1000 * 1000), + cast_function.execute(args, options).value) + end + + test("Arrow::Time: micro") do + cast_function = Arrow::Function.find("cast") + arrow_time = Arrow::Time.new(Arrow::TimeUnit::MICRO, + # 00:10:00.000000 + 60 * 10 * 1000 * 1000) + args = [arrow_time] + options = Arrow::CastOptions.new + options.to_data_type = Arrow::Time32DataType.new(:second) + options.allow_time_truncate = true + assert_equal(Arrow::Time32Scalar.new(options.to_data_type, + # 00:10:00 + 60 * 10), + cast_function.execute(args, options).value) + end + + test("Time") do + cast_function = Arrow::Function.find("cast") + time = Time.utc(2021, 6, 12, 1, 2, 3, 1) + args = [time] + options = Arrow::CastOptions.new + options.to_data_type = Arrow::TimestampDataType.new(:second) + options.allow_time_truncate = true + time = Time.utc(time.year, + time.month, + time.day, + time.hour, + time.min, + time.sec) + assert_equal(Arrow::TimestampScalar.new(options.to_data_type, + time.to_i), + cast_function.execute(args, options).value) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-group.rb b/src/arrow/ruby/red-arrow/test/test-group.rb new file mode 100644 index 000000000..2823977d5 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-group.rb @@ -0,0 +1,180 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class GroupTest < Test::Unit::TestCase + include Helper::Fixture + + def setup + raw_table = { + :group_key1 => Arrow::UInt8Array.new([1, 1, 2, 3, 3, 3]), + :group_key2 => Arrow::UInt8Array.new([1, 1, 1, 1, 2, 2]), + :int => Arrow::Int32Array.new([-1, -2, nil, -4, -5, -6]), + :uint => Arrow::UInt32Array.new([1, nil, 3, 4, 5, 6]), + :float => Arrow::FloatArray.new([nil, 2.2, 3.3, 4.4, 5.5, 6.6]), + :string => Arrow::StringArray.new(["a", "b", "c", nil, "e", "f"]), + } + @table = Arrow::Table.new(raw_table) + end + + sub_test_case("key") do + test("Time") do + time_values = [ + Time.parse("2018-01-29"), + Time.parse("2018-01-30"), + ] + raw_table = { + :time => Arrow::ArrayBuilder.build(time_values), + :int => Arrow::Int32Array.new([-1, -2]), + } + table = Arrow::Table.new(raw_table) + assert_equal(<<-TABLE, table.group(:time).count.to_s) + count(int) time +0 1 #{time_values[0].iso8601} +1 1 #{time_values[1].iso8601} + TABLE + end + end + + sub_test_case("#count") do + test("single") do + assert_equal(<<-TABLE, @table.group(:group_key1).count.to_s) + count(group_key2) count(int) count(uint) count(float) count(string) group_key1 +0 2 2 1 1 2 1 +1 1 0 1 1 1 2 +2 3 3 3 3 2 3 + TABLE + end + + test("multiple") do + assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).count.to_s) + count(int) count(uint) count(float) count(string) group_key1 group_key2 +0 2 1 1 2 1 1 +1 0 1 1 1 2 1 +2 1 1 1 0 3 1 +3 2 2 2 2 3 2 + TABLE + end + + test("column") do + group = @table.group(:group_key1, :group_key2) + assert_equal(<<-TABLE, group.count(:int, :uint).to_s) + count(int) count(uint) group_key1 group_key2 +0 2 1 1 1 +1 0 1 2 1 +2 1 1 3 1 +3 2 2 3 2 + TABLE + end + end + + sub_test_case("#sum") do + test("single") do + assert_equal(<<-TABLE, @table.group(:group_key1).sum.to_s) + sum(group_key2) sum(int) sum(uint) sum(float) group_key1 +0 2 -3 1 2.200000 1 +1 1 (null) 3 3.300000 2 +2 5 -15 15 16.500000 3 + TABLE + end + + test("multiple") do + assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).sum.to_s) + sum(int) sum(uint) sum(float) group_key1 group_key2 +0 -3 1 2.200000 1 1 +1 (null) 3 3.300000 2 1 +2 -4 4 4.400000 3 1 +3 -11 11 12.100000 3 2 + TABLE + end + end + + sub_test_case("#mean") do + test("single") do + assert_equal(<<-TABLE, @table.group(:group_key1).mean.to_s) + mean(group_key2) mean(int) mean(uint) mean(float) group_key1 +0 1.000000 -1.500000 1.000000 2.200000 1 +1 1.000000 (null) 3.000000 3.300000 2 +2 1.666667 -5.000000 5.000000 5.500000 3 + TABLE + end + + test("multiple") do + assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).mean.to_s) + mean(int) mean(uint) mean(float) group_key1 group_key2 +0 -1.500000 1.000000 2.200000 1 1 +1 (null) 3.000000 3.300000 2 1 +2 -4.000000 4.000000 4.400000 3 1 +3 -5.500000 5.500000 6.050000 3 2 + TABLE + end + end + + sub_test_case("#min") do + test("single") do + assert_equal(<<-TABLE, @table.group(:group_key1).min.to_s) + min(group_key2) min(int) min(uint) min(float) group_key1 +0 1 -2 1 2.200000 1 +1 1 (null) 3 3.300000 2 +2 1 -6 4 4.400000 3 + TABLE + end + + test("multiple") do + assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).min.to_s) + min(int) min(uint) min(float) group_key1 group_key2 +0 -2 1 2.200000 1 1 +1 (null) 3 3.300000 2 1 +2 -4 4 4.400000 3 1 +3 -6 5 5.500000 3 2 + TABLE + end + end + + sub_test_case("#max") do + test("single") do + assert_equal(<<-TABLE, @table.group(:group_key1).max.to_s) + max(group_key2) max(int) max(uint) max(float) group_key1 +0 1 -1 1 2.200000 1 +1 1 (null) 3 3.300000 2 +2 2 -4 6 6.600000 3 + TABLE + end + + test("multiple") do + assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).max.to_s) + max(int) max(uint) max(float) group_key1 group_key2 +0 -1 1 2.200000 1 1 +1 (null) 3 3.300000 2 1 +2 -4 4 4.400000 3 1 +3 -5 6 6.600000 3 2 + TABLE + end + end + + sub_test_case("#aggregate") do + test("function()") do + group = @table.group(:group_key1, :group_key2) + assert_equal(<<-TABLE, group.aggregate("count(int)", "sum(uint)").to_s) + count(int) sum(uint) group_key1 group_key2 +0 2 1 1 1 +1 0 3 2 1 +2 1 4 3 1 +3 2 11 3 2 + TABLE + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-list-array-builder.rb b/src/arrow/ruby/red-arrow/test/test-list-array-builder.rb new file mode 100644 index 000000000..aee31e73b --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-list-array-builder.rb @@ -0,0 +1,79 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class ListArrayBuilderTest < Test::Unit::TestCase + def setup + @data_type = Arrow::ListDataType.new(name: "visible", type: :boolean) + @builder = Arrow::ListArrayBuilder.new(@data_type) + end + + sub_test_case("#append_value") do + test("nil") do + @builder.append_value(nil) + array = @builder.finish + assert_equal(nil, array[0]) + end + + test("Array") do + @builder.append_value([true, false, true]) + array = @builder.finish + assert_equal([true, false, true], array[0].to_a) + end + end + + sub_test_case("#append_values") do + test("[nil, Array]") do + @builder.append_values([[false], nil, [true, false, true]]) + array = @builder.finish + assert_equal([ + [false], + nil, + [true, false, true], + ], + array.collect {|list| list ? list.to_a : nil}) + end + + test("is_valids") do + @builder.append_values([[false], [true, true], [true, false, true]], + [true, false, true]) + array = @builder.finish + assert_equal([ + [false], + nil, + [true, false, true], + ], + array.collect {|list| list ? list.to_a : nil}) + end + end + + sub_test_case("#append") do + test("backward compatibility") do + @builder.append + @builder.value_builder.append(true) + @builder.value_builder.append(false) + @builder.append + @builder.value_builder.append(true) + array = @builder.finish + + assert_equal([ + [true, false], + [true], + ], + array.collect(&:to_a)) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-list-array.rb b/src/arrow/ruby/red-arrow/test/test-list-array.rb new file mode 100644 index 000000000..c1f762492 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-list-array.rb @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class ListArrayTest < Test::Unit::TestCase + sub_test_case(".new") do + test("build") do + data_type = Arrow::ListDataType.new(name: "visible", type: :boolean) + values = [ + [true, false], + nil, + [false, true, false], + ] + array = Arrow::ListArray.new(data_type, values) + assert_equal(values, + array.collect {|value| value ? value.to_a : nil}) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-list-data-type.rb b/src/arrow/ruby/red-arrow/test/test-list-data-type.rb new file mode 100644 index 000000000..ada46394d --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-list-data-type.rb @@ -0,0 +1,69 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class ListDataTypeTest < Test::Unit::TestCase + sub_test_case(".new") do + test("Arrow::Field") do + field = Arrow::Field.new(:tag, :string) + assert_equal("list<tag: string>", + Arrow::ListDataType.new(field).to_s) + end + + test("name: String") do + assert_equal("list<tag: string>", + Arrow::ListDataType.new(name: "tag", type: :string).to_s) + end + + test("field: Arrow::Field") do + field = Arrow::Field.new(:tag, :string) + assert_equal("list<tag: string>", + Arrow::ListDataType.new(field: field).to_s) + end + + test("field: Hash") do + field_description = {name: "tag", type: :string} + assert_equal("list<tag: string>", + Arrow::ListDataType.new(field: field_description).to_s) + end + + test("Arrow::DataType") do + data_type = Arrow::BooleanDataType.new + assert_equal("list<item: bool>", + Arrow::ListDataType.new(data_type).to_s) + end + + test("String") do + assert_equal("list<item: bool>", + Arrow::ListDataType.new("boolean").to_s) + end + + test("Symbol") do + assert_equal("list<item: bool>", + Arrow::ListDataType.new(:boolean).to_s) + end + + test("[data type name, additional information]") do + assert_equal("list<item: time32[ms]>", + Arrow::ListDataType.new([:time32, :milli]).to_s) + end + + test("type: Symbol") do + assert_equal("list<item: bool>", + Arrow::ListDataType.new(type: :boolean).to_s) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-map-array-builder.rb b/src/arrow/ruby/red-arrow/test/test-map-array-builder.rb new file mode 100644 index 000000000..80e571448 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-map-array-builder.rb @@ -0,0 +1,110 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class MapArrayBuilderTest < Test::Unit::TestCase + def setup + key_type = Arrow::StringDataType.new + item_type = Arrow::Int16DataType.new + data_type = Arrow::MapDataType.new(key_type, item_type) + @builder = Arrow::MapArrayBuilder.new(data_type) + end + + sub_test_case("#append_value") do + test("nil") do + @builder.append_value(nil) + array = @builder.finish + assert_equal([nil], array.collect {|value| value}) + end + + test("Hash") do + @builder.append_value({"a" => 0, "b" => 1}) + @builder.append_value({"c" => 0, "d" => 1}) + array = @builder.finish + assert_equal([ + {"a" => 0, "b" => 1}, + {"c" => 0, "d" => 1} + ], + array.collect {|value| value}) + end + + test("#each") do + @builder.append_value([["a", 0], ["b", 1]]) + @builder.append_value([["c", 0], ["d", 1]]) + array = @builder.finish + assert_equal([ + {"a" => 0, "b" => 1}, + {"c" => 0, "d" => 1} + ], + array.collect {|value| value}) + end + end + + sub_test_case("#append_values") do + test("[nil]") do + @builder.append_values([nil]) + array = @builder.finish + assert_equal([nil], array.collect {|value| value}) + end + + test("[Hash]") do + @builder.append_values([{"a" => 0, "b" => 1}, {"c" => 0, "d" => 1}]) + array = @builder.finish + assert_equal([ + {"a" => 0, "b" => 1}, + {"c" => 0, "d" => 1} + ], + array.collect {|value| value}) + end + + test("[#each]") do + @builder.append_values([[["a", 0], ["b", 1]], [["c", 0], ["d", 1]]]) + array = @builder.finish + assert_equal([ + {"a" => 0, "b" => 1}, + {"c" => 0, "d" => 1} + ], + array.collect {|value| value}) + end + + test("[nil, Hash, #each]") do + @builder.append_values([nil, {"a" => 0, "b" => 1}, [["c", 0], ["d", 1]]]) + array = @builder.finish + assert_equal([ + nil, + {"a" => 0, "b" => 1}, + {"c" => 0, "d" => 1} + ], + array.collect {|value| value}) + end + + test("is_valids") do + @builder.append_values([ + {"a" => 0, "b" => 1}, + {"c" => 0, "d" => 1}, + {"e" => 0, "f" => 1} + ], + [true, false, true]) + array = @builder.finish + assert_equal([ + {"a" => 0, "b" => 1}, + nil, + {"e" => 0, "f" => 1} + ], + array.collect {|value| value}) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-map-array.rb b/src/arrow/ruby/red-arrow/test/test-map-array.rb new file mode 100644 index 000000000..9f4c1ff57 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-map-array.rb @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class MapArrayTest < Test::Unit::TestCase + sub_test_case(".new") do + test("build") do + key_type = Arrow::StringDataType.new + item_type = Arrow::Int16DataType.new + data_type = Arrow::MapDataType.new(key_type, item_type) + values = [ + {"a" => 0, "b" => 1}, + nil, + {"c" => 0, "d" => 1} + ] + array = Arrow::MapArray.new(data_type, values) + assert_equal(values, array.collect {|value| value}) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-map-data-type.rb b/src/arrow/ruby/red-arrow/test/test-map-data-type.rb new file mode 100644 index 000000000..cdbbc2ed1 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-map-data-type.rb @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class MapDataTypeTest < Test::Unit::TestCase + sub_test_case(".new") do + def setup + @key = :int8 + @item = :string + end + + test("ordered arguments") do + assert_equal("map<int8, string>", + Arrow::MapDataType.new(@key, @item).to_s) + end + + test("description") do + assert_equal("map<int8, string>", + Arrow::MapDataType.new(key: @key, + item: @item).to_s) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-memory-view.rb b/src/arrow/ruby/red-arrow/test/test-memory-view.rb new file mode 100644 index 000000000..0b9c98c40 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-memory-view.rb @@ -0,0 +1,434 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class MemoryViewTest < Test::Unit::TestCase + def setup + unless Fiddle.const_defined?(:MemoryView) + omit("Fiddle::MemoryView is needed") + end + unless Fiddle::MemoryView.respond_to?(:export) + omit("Fiddle::MemoryView.export is needed") + end + end + + def little_endian? + [1].pack("s") == [1].pack("s<") + end + + test("BooleanArray") do + array = Arrow::BooleanArray.new([true] * 9) + Fiddle::MemoryView.export(array) do |memory_view| + if little_endian? + template = "b" + else + template = "B" + end + assert_equal([ + "#{template}8", + 1, + 2, + [(("1" * 9) + ("0" * 7))].pack("#{template}*"), + ], + [ + memory_view.format, + memory_view.item_size, + memory_view.byte_size, + memory_view.to_s, + ]) + end + end + + test("Int8Array") do + values = [-(2 ** 7), 0, (2 ** 7) - 1] + array = Arrow::Int8Array.new(values) + Fiddle::MemoryView.export(array) do |memory_view| + assert_equal([ + "c", + 1, + values.size, + values.pack("c*"), + ], + [ + memory_view.format, + memory_view.item_size, + memory_view.byte_size, + memory_view.to_s, + ]) + end + end + + test("Int16Array") do + values = [-(2 ** 15), 0, (2 ** 15) - 1] + array = Arrow::Int16Array.new(values) + Fiddle::MemoryView.export(array) do |memory_view| + assert_equal([ + "s", + 2, + 2 * values.size, + values.pack("s*"), + ], + [ + memory_view.format, + memory_view.item_size, + memory_view.byte_size, + memory_view.to_s, + ]) + end + end + + test("Int32Array") do + values = [-(2 ** 31), 0, (2 ** 31) - 1] + array = Arrow::Int32Array.new(values) + Fiddle::MemoryView.export(array) do |memory_view| + assert_equal([ + "l", + 4, + 4 * values.size, + values.pack("l*"), + ], + [ + memory_view.format, + memory_view.item_size, + memory_view.byte_size, + memory_view.to_s, + ]) + end + end + + test("Int64Array") do + values = [-(2 ** 63), 0, (2 ** 63) - 1] + array = Arrow::Int64Array.new(values) + Fiddle::MemoryView.export(array) do |memory_view| + assert_equal([ + "q", + 8, + 8 * values.size, + values.pack("q*"), + ], + [ + memory_view.format, + memory_view.item_size, + memory_view.byte_size, + memory_view.to_s, + ]) + end + end + + test("UInt8Array") do + values = [0, (2 ** 8) - 1] + array = Arrow::UInt8Array.new(values) + Fiddle::MemoryView.export(array) do |memory_view| + assert_equal([ + "C", + 1, + values.size, + values.pack("C*"), + ], + [ + memory_view.format, + memory_view.item_size, + memory_view.byte_size, + memory_view.to_s, + ]) + end + end + + test("UInt16Array") do + values = [0, (2 ** 16) - 1] + array = Arrow::UInt16Array.new(values) + Fiddle::MemoryView.export(array) do |memory_view| + assert_equal([ + "S", + 2, + 2 * values.size, + values.pack("S*"), + ], + [ + memory_view.format, + memory_view.item_size, + memory_view.byte_size, + memory_view.to_s, + ]) + end + end + + test("UInt32Array") do + values = [0, (2 ** 32) - 1] + array = Arrow::UInt32Array.new(values) + Fiddle::MemoryView.export(array) do |memory_view| + assert_equal([ + "L", + 4, + 4 * values.size, + values.pack("L*"), + ], + [ + memory_view.format, + memory_view.item_size, + memory_view.byte_size, + memory_view.to_s, + ]) + end + end + + test("UInt64Array") do + values = [(2 ** 64) - 1] + array = Arrow::UInt64Array.new(values) + Fiddle::MemoryView.export(array) do |memory_view| + assert_equal([ + "Q", + 8, + 8 * values.size, + values.pack("Q*"), + ], + [ + memory_view.format, + memory_view.item_size, + memory_view.byte_size, + memory_view.to_s, + ]) + end + end + + test("FloatArray") do + values = [-1.1, 0.0, 1.1] + array = Arrow::FloatArray.new(values) + Fiddle::MemoryView.export(array) do |memory_view| + assert_equal([ + "f", + 4, + 4 * values.size, + values.pack("f*"), + ], + [ + memory_view.format, + memory_view.item_size, + memory_view.byte_size, + memory_view.to_s, + ]) + end + end + + test("DoubleArray") do + values = [-1.1, 0.0, 1.1] + array = Arrow::DoubleArray.new(values) + Fiddle::MemoryView.export(array) do |memory_view| + assert_equal([ + "d", + 8, + 8 * values.size, + values.pack("d*"), + ], + [ + memory_view.format, + memory_view.item_size, + memory_view.byte_size, + memory_view.to_s, + ]) + end + end + + test("FixedSizeBinaryArray") do + values = ["\x01\x02", "\x03\x04", "\x05\x06"] + data_type = Arrow::FixedSizeBinaryDataType.new(2) + array = Arrow::FixedSizeBinaryArray.new(data_type, values) + Fiddle::MemoryView.export(array) do |memory_view| + assert_equal([ + "C2", + 2, + 2 * values.size, + values.join("").b, + ], + [ + memory_view.format, + memory_view.item_size, + memory_view.byte_size, + memory_view.to_s, + ]) + end + end + + test("Date32Array") do + n_days_since_epoch = 17406 # 2017-08-28 + values = [n_days_since_epoch] + array = Arrow::Date32Array.new(values) + Fiddle::MemoryView.export(array) do |memory_view| + assert_equal([ + "l", + 4, + 4 * values.size, + values.pack("l*"), + ], + [ + memory_view.format, + memory_view.item_size, + memory_view.byte_size, + memory_view.to_s, + ]) + end + end + + test("Date64Array") do + n_msecs_since_epoch = 1503878400000 # 2017-08-28T00:00:00Z + values = [n_msecs_since_epoch] + array = Arrow::Date64Array.new(values) + Fiddle::MemoryView.export(array) do |memory_view| + assert_equal([ + "q", + 8, + 8 * values.size, + values.pack("q*"), + ], + [ + memory_view.format, + memory_view.item_size, + memory_view.byte_size, + memory_view.to_s, + ]) + end + end + + test("Time32Array") do + values = [1, 2, 3] + array = Arrow::Time32Array.new(:milli, values) + Fiddle::MemoryView.export(array) do |memory_view| + assert_equal([ + "l", + 4, + 4 * values.size, + values.pack("l*"), + ], + [ + memory_view.format, + memory_view.item_size, + memory_view.byte_size, + memory_view.to_s, + ]) + end + end + + test("Time64Array") do + values = [1, 2, 3] + array = Arrow::Time64Array.new(:nano, values) + Fiddle::MemoryView.export(array) do |memory_view| + assert_equal([ + "q", + 8, + 8 * values.size, + values.pack("q*"), + ], + [ + memory_view.format, + memory_view.item_size, + memory_view.byte_size, + memory_view.to_s, + ]) + end + end + + test("TimestampArray") do + values = [1, 2, 3] + array = Arrow::TimestampArray.new(:micro, values) + Fiddle::MemoryView.export(array) do |memory_view| + assert_equal([ + "q", + 8, + 8 * values.size, + values.pack("q*"), + ], + [ + memory_view.format, + memory_view.item_size, + memory_view.byte_size, + memory_view.to_s, + ]) + end + end + + test("Decimal128Array") do + values = [ + Arrow::Decimal128.new("10.1"), + Arrow::Decimal128.new("11.1"), + Arrow::Decimal128.new("10.2"), + ] + data_type = Arrow::Decimal128DataType.new(3, 1) + array = Arrow::Decimal128Array.new(data_type, values) + Fiddle::MemoryView.export(array) do |memory_view| + assert_equal([ + "q2", + 16, + 16 * values.size, + values.collect {|value| value.to_bytes.to_s}.join(""), + ], + [ + memory_view.format, + memory_view.item_size, + memory_view.byte_size, + memory_view.to_s, + ]) + end + end + + test("Decimal256Array") do + values = [ + Arrow::Decimal256.new("10.1"), + Arrow::Decimal256.new("11.1"), + Arrow::Decimal256.new("10.2"), + ] + data_type = Arrow::Decimal256DataType.new(3, 1) + array = Arrow::Decimal256Array.new(data_type, values) + Fiddle::MemoryView.export(array) do |memory_view| + assert_equal([ + "q4", + 32, + 32 * values.size, + values.collect {|value| value.to_bytes.to_s}.join(""), + ], + [ + memory_view.format, + memory_view.item_size, + memory_view.byte_size, + memory_view.to_s, + ]) + end + end + + test("Buffer") do + values = [0, nil, nil] * 3 + array = Arrow::Int8Array.new(values) + buffer = array.null_bitmap + Fiddle::MemoryView.export(buffer) do |memory_view| + if little_endian? + template = "b" + else + template = "B" + end + assert_equal([ + "#{template}8", + 1, + 2, + ["100" * 3].pack("#{template}*"), + ], + [ + memory_view.format, + memory_view.item_size, + memory_view.byte_size, + memory_view.to_s, + ]) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-null-array.rb b/src/arrow/ruby/red-arrow/test/test-null-array.rb new file mode 100644 index 000000000..c5d061636 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-null-array.rb @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class NullArrayTest < Test::Unit::TestCase + test("#[]") do + array = Arrow::NullArray.new(1) + assert_nil(array[0]) + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-orc.rb b/src/arrow/ruby/red-arrow/test/test-orc.rb new file mode 100644 index 000000000..b882da0a1 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-orc.rb @@ -0,0 +1,173 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class ORCTest < Test::Unit::TestCase + include Helper::Fixture + + def setup + omit("Require Apache Arrow ORC") unless Arrow.const_defined?(:ORCFileReader) + @orc_path = fixture_path("TestOrcFile.test1.orc") + end + + def pp_values(values) + "[\n " + values.collect(&:inspect).join(",\n ") + "\n]" + end + + sub_test_case("load") do + test("default") do + table = Arrow::Table.load(@orc_path) + dump = table.columns.collect do |column| + [ + column.field.to_s, + column.data.chunks.collect(&:to_s), + ] + end + assert_equal([ + ["boolean1: bool", [pp_values([false, true])]], + ["byte1: int8", [pp_values([1, 100])]], + ["short1: int16", [pp_values([1024, 2048])]], + ["int1: int32", [pp_values([65536, 65536])]], + [ + "long1: int64", + [pp_values([9223372036854775807, 9223372036854775807])], + ], + ["float1: float", [pp_values([1, 2])]], + ["double1: double", [pp_values([-15, -5])]], + ["bytes1: binary", ["[\n 0001020304,\n \n]"]], + ["string1: string", [pp_values(["hi", "bye"])]], + [ + "middle: " + + "struct<list: " + + "list<item: struct<int1: int32, string1: string>>>", + [ + <<-STRUCT.chomp +-- is_valid: all not null +-- child 0 type: list<item: struct<int1: int32, string1: string>> + [ + -- is_valid: all not null + -- child 0 type: int32 + [ + 1, + 2 + ] + -- child 1 type: string + [ + "bye", + "sigh" + ], + -- is_valid: all not null + -- child 0 type: int32 + [ + 1, + 2 + ] + -- child 1 type: string + [ + "bye", + "sigh" + ] + ] + STRUCT + ] + ], + [ + "list: list<item: struct<int1: int32, string1: string>>", + [ + <<-LIST.chomp +[ + -- is_valid: all not null + -- child 0 type: int32 + [ + 3, + 4 + ] + -- child 1 type: string + [ + "good", + "bad" + ], + -- is_valid: all not null + -- child 0 type: int32 + [ + 100000000, + -100000, + 1234 + ] + -- child 1 type: string + [ + "cat", + "in", + "hat" + ] +] + LIST + ] + ], + [ + "map: map<string, struct<int1: int32, string1: string>>", + [ + <<-MAP.chomp +[ + keys: + [] + values: + -- is_valid: all not null + -- child 0 type: int32 + [] + -- child 1 type: string + [], + keys: + [ + "chani", + "mauddib" + ] + values: + -- is_valid: all not null + -- child 0 type: int32 + [ + 5, + 1 + ] + -- child 1 type: string + [ + "chani", + "mauddib" + ] +] + MAP + ], + ], + ], + dump) + end + + test(":field_indexes") do + table = Arrow::Table.load(@orc_path, field_indexes: [1, 3]) + dump = table.columns.collect do |column| + [ + column.field.to_s, + column.data.chunks.collect(&:to_s), + ] + end + assert_equal([ + ["boolean1: bool", [pp_values([false, true])]], + ["short1: int16", [pp_values([1024, 2048])]], + ], + dump) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-record-batch-builder.rb b/src/arrow/ruby/red-arrow/test/test-record-batch-builder.rb new file mode 100644 index 000000000..988e02043 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-record-batch-builder.rb @@ -0,0 +1,125 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class RecordBatchBuilderTest < Test::Unit::TestCase + sub_test_case(".new") do + test("Schema") do + schema = Arrow::Schema.new(visible: :boolean, + count: :uint32) + builder = Arrow::RecordBatchBuilder.new(schema) + assert_equal(schema, + builder.schema) + end + + test("Hash") do + builder = Arrow::RecordBatchBuilder.new(visible: :boolean, + count: :uint32) + assert_equal(Arrow::Schema.new(visible: :boolean, + count: :uint32), + builder.schema) + end + end + + sub_test_case("instance methods") do + def setup + @schema = Arrow::Schema.new(visible: :boolean, + count: :uint32) + @builder = Arrow::RecordBatchBuilder.new(@schema) + end + + sub_test_case("#[]") do + test("String") do + assert_equal(Arrow::BooleanDataType.new, + @builder["visible"].value_data_type) + end + + test("Symbol") do + assert_equal(Arrow::BooleanDataType.new, + @builder[:visible].value_data_type) + end + + test("Integer") do + assert_equal(Arrow::UInt32DataType.new, + @builder[1].value_data_type) + end + end + + test("#append") do + records = [ + {visible: true, count: 1}, + ] + columns = { + visible: [false], + count: [2], + } + arrays = [ + Arrow::BooleanArray.new([true, false]), + Arrow::UInt32Array.new([1, 2]), + ] + @builder.append(records, columns) + assert_equal(Arrow::RecordBatch.new(@schema, + arrays[0].length, + arrays), + @builder.flush) + end + + test("#append_records") do + records = [ + {visible: true, count: 1}, + {visible: true, count: 2, garbage: "garbage"}, + {visible: true}, + [false, 4], + nil, + [true], + ] + arrays = [ + Arrow::BooleanArray.new([true, true, true, false, nil, true]), + Arrow::UInt32Array.new([1, 2, nil, 4, nil, nil]), + ] + @builder.append_records(records) + assert_equal(Arrow::RecordBatch.new(@schema, + arrays[0].length, + arrays), + @builder.flush) + end + + test("#append_columns") do + columns = { + visible: [true, true, true, false, nil, true], + count: [1, 2, nil, 4, nil, nil], + } + arrays = [ + Arrow::BooleanArray.new(columns[:visible]), + Arrow::UInt32Array.new(columns[:count]), + ] + @builder.append_columns(columns) + assert_equal(Arrow::RecordBatch.new(@schema, + arrays[0].length, + arrays), + @builder.flush) + end + + test("#column_builders") do + column_builders = [ + @builder.get_column_builder(0), + @builder.get_column_builder(1), + ] + assert_equal(column_builders, + @builder.column_builders) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-record-batch-file-reader.rb b/src/arrow/ruby/red-arrow/test/test-record-batch-file-reader.rb new file mode 100644 index 000000000..57b02abf9 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-record-batch-file-reader.rb @@ -0,0 +1,115 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class RecordBatchFileReaderTest < Test::Unit::TestCase + test("write/read") do + fields = [ + Arrow::Field.new("uint8", :uint8), + Arrow::Field.new("uint16", :uint16), + Arrow::Field.new("uint32", :uint32), + Arrow::Field.new("uint64", :uint64), + Arrow::Field.new("int8", :int8), + Arrow::Field.new("int16", :int16), + Arrow::Field.new("int32", :int32), + Arrow::Field.new("int64", :int64), + Arrow::Field.new("float", :float), + Arrow::Field.new("double", :double), + ] + schema = Arrow::Schema.new(fields) + + tempfile = Tempfile.new(["batch", ".arrow"]) + Arrow::FileOutputStream.open(tempfile.path, false) do |output| + Arrow::RecordBatchFileWriter.open(output, schema) do |writer| + uints = [1, 2, 4, 8] + ints = [1, -2, 4, -8] + floats = [1.1, -2.2, 4.4, -8.8] + columns = [ + Arrow::UInt8Array.new(uints), + Arrow::UInt16Array.new(uints), + Arrow::UInt32Array.new(uints), + Arrow::UInt64Array.new(uints), + Arrow::Int8Array.new(ints), + Arrow::Int16Array.new(ints), + Arrow::Int32Array.new(ints), + Arrow::Int64Array.new(ints), + Arrow::FloatArray.new(floats), + Arrow::DoubleArray.new(floats), + ] + + record_batch = Arrow::RecordBatch.new(schema, 4, columns) + writer.write_record_batch(record_batch) + end + end + + Arrow::MemoryMappedInputStream.open(tempfile.path) do |input| + reader = Arrow::RecordBatchFileReader.new(input) + reader.each do |record_batch| + assert_equal([ + { + "uint8" => 1, + "uint16" => 1, + "uint32" => 1, + "uint64" => 1, + "int8" => 1, + "int16" => 1, + "int32" => 1, + "int64" => 1, + "float" => 1.100000023841858, + "double" => 1.1, + }, + { + "uint8" => 2, + "uint16" => 2, + "uint32" => 2, + "uint64" => 2, + "int8" => -2, + "int16" => -2, + "int32" => -2, + "int64" => -2, + "float" => -2.200000047683716, + "double" => -2.2, + }, + { + "uint8" => 4, + "uint16" => 4, + "uint32" => 4, + "uint64" => 4, + "int8" => 4, + "int16" => 4, + "int32" => 4, + "int64" => 4, + "float" => 4.400000095367432, + "double" => 4.4, + }, + { + "uint8" => 8, + "uint16" => 8, + "uint32" => 8, + "uint64" => 8, + "int8" => -8, + "int16" => -8, + "int32" => -8, + "int64" => -8, + "float" => -8.800000190734863, + "double" => -8.8, + }, + ], + record_batch.collect(&:to_h)) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-record-batch-iterator.rb b/src/arrow/ruby/red-arrow/test/test-record-batch-iterator.rb new file mode 100644 index 000000000..88f3ecaac --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-record-batch-iterator.rb @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class RecordBatchIteratorTest < Test::Unit::TestCase + def setup + @schema = Arrow::Schema.new(visible: :boolean, + count: :uint32) + @record_batches = [ + Arrow::RecordBatch.new(@schema, + visible: [true], + count: [1]), + Arrow::RecordBatch.new(@schema, + visible: [false, nil], + count: [nil, 3]), + ] + @iterator = Arrow::RecordBatchIterator.new(@record_batches) + end + + def test_to_a + assert_equal(@record_batches, + @iterator.to_a) + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-record-batch-reader.rb b/src/arrow/ruby/red-arrow/test/test-record-batch-reader.rb new file mode 100644 index 000000000..1becdf5b6 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-record-batch-reader.rb @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestRecordBatchReader < Test::Unit::TestCase + sub_test_case(".try_convert") do + test("Arrow::RecordBatch") do + record_batch = + Arrow::RecordBatch.new("count" => [1, 2, 3], + "private" => [true, false, true]) + reader = Arrow::RecordBatchReader.try_convert(record_batch) + assert_equal(record_batch, + reader.read_next) + end + + test("[Arrow::RecordBatch]") do + record_batch = + Arrow::RecordBatch.new("count" => [1, 2, 3], + "private" => [true, false, true]) + reader = Arrow::RecordBatchReader.try_convert([record_batch]) + assert_equal(record_batch, + reader.read_next) + end + + test("Arrow::Table") do + table = Arrow::Table.new("count" => [1, 2, 3], + "private" => [true, false, true]) + reader = Arrow::RecordBatchReader.try_convert(table) + assert_equal(table, + reader.read_all) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-record-batch.rb b/src/arrow/ruby/red-arrow/test/test-record-batch.rb new file mode 100644 index 000000000..e94c26f2e --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-record-batch.rb @@ -0,0 +1,182 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class RecordBatchTest < Test::Unit::TestCase + sub_test_case(".new") do + def setup + @schema = Arrow::Schema.new(visible: :boolean, + count: :uint32) + end + + test("[raw_table]") do + raw_table = { + visible: [true, nil, false], + count: [1, nil, 3], + } + record_batch = Arrow::RecordBatch.new(raw_table) + assert_equal([ + {"visible" => true, "count" => 1}, + {"visible" => nil, "count" => nil}, + {"visible" => false, "count" => 3}, + ], + record_batch.each_record.collect(&:to_h)) + end + + test("[Schema, records]") do + records = [ + {visible: true, count: 1}, + nil, + [false, 3], + ] + record_batch = Arrow::RecordBatch.new(@schema, records) + assert_equal([ + {"visible" => true, "count" => 1}, + {"visible" => nil, "count" => nil}, + {"visible" => false, "count" => 3}, + ], + record_batch.each_record.collect(&:to_h)) + end + + test("[Schema, columns]") do + columns = { + visible: [true, nil, false], + count: [1, 2, nil], + } + record_batch = Arrow::RecordBatch.new(@schema, columns) + assert_equal([ + {"visible" => true, "count" => 1}, + {"visible" => nil, "count" => 2}, + {"visible" => false, "count" => nil}, + ], + record_batch.each_record.collect(&:to_h)) + end + + test("[Schema, n_rows, columns]") do + columns = [ + Arrow::BooleanArray.new([true, nil, false]), + Arrow::UInt32Array.new([1, 2, nil]), + ] + n_rows = columns[0].length + record_batch = Arrow::RecordBatch.new(@schema, n_rows, columns) + assert_equal([ + {"visible" => true, "count" => 1}, + {"visible" => nil, "count" => 2}, + {"visible" => false, "count" => nil}, + ], + record_batch.each_record.collect(&:to_h)) + end + end + + sub_test_case("instance methods") do + def setup + @schema = Arrow::Schema.new(count: :uint32) + @counts = Arrow::UInt32Array.new([1, 2, 4, 8]) + @record_batch = Arrow::RecordBatch.new(@schema, @counts.length, [@counts]) + end + + sub_test_case("#each") do + test("default") do + records = [] + @record_batch.each do |record| + records << [record, record.index] + end + assert_equal([ + [0, 0], + [1, 1], + [2, 2], + [3, 3], + ], + records.collect {|record, i| [record.index, i]}) + end + + test("reuse_record: true") do + records = [] + @record_batch.each(reuse_record: true) do |record| + records << [record, record.index] + end + assert_equal([ + [3, 0], + [3, 1], + [3, 2], + [3, 3], + ], + records.collect {|record, i| [record.index, i]}) + end + end + + test("#to_table") do + assert_equal(Arrow::Table.new(@schema, [@counts]), + @record_batch.to_table) + end + + sub_test_case("#==") do + test("Arrow::RecordBatch") do + assert do + @record_batch == @record_batch + end + end + + test("not Arrow::RecordBatch") do + assert do + not (@record_batch == 29) + end + end + end + + sub_test_case("#[]") do + def setup + @record_batch = Arrow::RecordBatch.new(a: [true], + b: [true], + c: [true], + d: [true], + e: [true], + f: [true], + g: [true]) + end + + test("[String]") do + assert_equal(Arrow::Column.new(@record_batch, 0), + @record_batch["a"]) + end + + test("[Symbol]") do + assert_equal(Arrow::Column.new(@record_batch, 1), + @record_batch[:b]) + end + + test("[Integer]") do + assert_equal(Arrow::Column.new(@record_batch, 6), + @record_batch[-1]) + end + + test("[Range]") do + assert_equal(Arrow::RecordBatch.new(d: [true], + e: [true]), + @record_batch[3..4]) + end + + test("[[Symbol, String, Integer, Range]]") do + assert_equal(Arrow::RecordBatch.new(c: [true], + a: [true], + g: [true], + d: [true], + e: [true]), + @record_batch[[:c, "a", -1, 3..4]]) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-rolling-window.rb b/src/arrow/ruby/red-arrow/test/test-rolling-window.rb new file mode 100644 index 000000000..4158ad162 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-rolling-window.rb @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class RollingWindowTest < Test::Unit::TestCase + include Helper::Fixture + + def setup + raw_table = { + :number => Arrow::Int32Array.new([1, -2, nil, 4, 6, 3]), + } + @table = Arrow::Table.new(raw_table) + end + + test("#lag") do + assert_equal(<<-ARRAY.chomp, @table.window.lag(:number).to_s) +[ + null, + -3, + null, + null, + 2, + -3 +] + ARRAY + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-schema.rb b/src/arrow/ruby/red-arrow/test/test-schema.rb new file mode 100644 index 000000000..20d73b272 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-schema.rb @@ -0,0 +1,134 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class SchemaTest < Test::Unit::TestCase + include Helper::Omittable + + def setup + @count_field = Arrow::Field.new("count", :uint32) + @visible_field = Arrow::Field.new("visible", :boolean) + end + + sub_test_case(".new") do + test("[Arrow::Field]") do + fields = [ + @count_field, + @visible_field, + ] + assert_equal("count: uint32\n" + + "visible: bool", + Arrow::Schema.new(fields).to_s) + end + + test("[Arrow::Field, Hash]") do + fields = [ + @count_field, + {name: "visible", type: :boolean}, + ] + assert_equal("count: uint32\n" + + "visible: bool", + Arrow::Schema.new(fields).to_s) + end + + test("{String, Symbol => Arrow::DataType}") do + fields = { + "count" => Arrow::UInt32DataType.new, + :visible => :boolean, + } + assert_equal("count: uint32\n" + + "visible: bool", + Arrow::Schema.new(fields).to_s) + end + + test("{String, Symbol => Hash}") do + fields = { + "count" => {type: :uint32}, + :tags => { + type: :list, + field: { + name: "tag", + type: :string, + }, + }, + } + assert_equal("count: uint32\n" + + "tags: list<tag: string>", + Arrow::Schema.new(fields).to_s) + end + end + + sub_test_case("instance methods") do + def setup + super + @schema = Arrow::Schema.new([@count_field, @visible_field]) + end + + sub_test_case("#[]") do + test("[String]") do + assert_equal([@count_field, @visible_field], + [@schema["count"], @schema["visible"]]) + end + + test("[Symbol]") do + assert_equal([@count_field, @visible_field], + [@schema[:count], @schema[:visible]]) + end + + test("[Integer]") do + assert_equal([@count_field, @visible_field], + [@schema[0], @schema[1]]) + end + + test("[invalid]") do + invalid = [] + message = "field name or index must be String, Symbol or Integer" + message << ": <#{invalid.inspect}>" + assert_raise(ArgumentError.new(message)) do + @schema[invalid] + end + end + end + + sub_test_case("#==") do + test("Arrow::Schema") do + assert do + @schema == @schema + end + end + + test("not Arrow::Schema") do + assert do + not (@schema == 29) + end + end + end + + sub_test_case("#to_s") do + test("show_metadata") do + require_gi_bindings(3, 4, 2) + + schema = @schema.with_metadata("key" => "value") + assert_equal(<<-SCHEMA.chomp, schema.to_s(show_metadata: true)) +count: uint32 +visible: bool +-- metadata -- +key: value + SCHEMA + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-slicer.rb b/src/arrow/ruby/red-arrow/test/test-slicer.rb new file mode 100644 index 000000000..420086690 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-slicer.rb @@ -0,0 +1,487 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class SlicerTest < Test::Unit::TestCase + def setup + @count_field = Arrow::Field.new("count", :uint32) + @visible_field = Arrow::Field.new("visible", :boolean) + schema = Arrow::Schema.new([@count_field, @visible_field]) + count_arrays = [ + Arrow::UInt32Array.new([0, 1, 2]), + Arrow::UInt32Array.new([4, 8, 16]), + Arrow::UInt32Array.new([32, 64, nil]), + Arrow::UInt32Array.new([256]), + ] + visible_arrays = [ + Arrow::BooleanArray.new([nil, true, false, nil]), + Arrow::BooleanArray.new([true]), + Arrow::BooleanArray.new([true, false]), + Arrow::BooleanArray.new([nil]), + Arrow::BooleanArray.new([nil]), + Arrow::BooleanArray.new([true]), + ] + @count_array = Arrow::ChunkedArray.new(count_arrays) + @visible_array = Arrow::ChunkedArray.new(visible_arrays) + @table = Arrow::Table.new(schema, [@count_array, @visible_array]) + end + + sub_test_case("column") do + test("BooleanArray") do + sliced_table = @table.slice do |slicer| + slicer.visible + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 (null) (null) +1 1 true +2 (null) (null) +3 8 true +4 16 true +5 (null) (null) +6 (null) (null) +7 256 true + TABLE + end + + test("not BooleanArray") do + sliced_table = @table.slice do |slicer| + slicer.count + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 1 true +1 2 false +2 4 (null) +3 8 true +4 16 true +5 32 false +6 64 (null) +7 (null) (null) +8 256 true + TABLE + end + end + + sub_test_case("!column") do + test("BooleanArray") do + sliced_table = @table.slice do |slicer| + !slicer.visible + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 (null) (null) +1 2 false +2 (null) (null) +3 32 false +4 (null) (null) +5 (null) (null) + TABLE + end + + test("not BooleanArray") do + sliced_table = @table.slice do |slicer| + !slicer.count + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 0 (null) +1 (null) (null) + TABLE + end + end + + test("column.null?") do + sliced_table = @table.slice do |slicer| + slicer.visible.null? + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 0 (null) +1 4 (null) +2 64 (null) +3 (null) (null) + TABLE + end + + test("column.valid?") do + sliced_table = @table.slice do |slicer| + slicer.visible.valid? + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 1 true +1 2 false +2 8 true +3 16 true +4 32 false +5 256 true + TABLE + end + + sub_test_case("column ==") do + test("nil") do + sliced_table = @table.slice do |slicer| + slicer.visible == nil + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 0 (null) +1 4 (null) +2 64 (null) +3 (null) (null) + TABLE + end + + test("value") do + sliced_table = @table.slice do |slicer| + slicer.visible == true + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 (null) (null) +1 1 true +2 (null) (null) +3 8 true +4 16 true +5 (null) (null) +6 (null) (null) +7 256 true + TABLE + end + end + + sub_test_case("!(column ==)") do + test("nil") do + sliced_table = @table.slice do |slicer| + !(slicer.visible == nil) + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 1 true +1 2 false +2 8 true +3 16 true +4 32 false +5 256 true + TABLE + end + + test("value") do + sliced_table = @table.slice do |slicer| + !(slicer.visible == true) + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 (null) (null) +1 2 false +2 (null) (null) +3 32 false +4 (null) (null) +5 (null) (null) + TABLE + end + end + + sub_test_case("column !=") do + test("nil") do + sliced_table = @table.slice do |slicer| + slicer.visible != nil + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 1 true +1 2 false +2 8 true +3 16 true +4 32 false +5 256 true + TABLE + end + + test("value") do + sliced_table = @table.slice do |slicer| + slicer.visible != true + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 (null) (null) +1 2 false +2 (null) (null) +3 32 false +4 (null) (null) +5 (null) (null) + TABLE + end + end + + test("column < value") do + sliced_table = @table.slice do |slicer| + slicer.count < 16 + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 0 (null) +1 1 true +2 2 false +3 4 (null) +4 8 true +5 (null) (null) + TABLE + end + + test("!(column < value)") do + sliced_table = @table.slice do |slicer| + !(slicer.count < 16) + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 16 true +1 32 false +2 64 (null) +3 (null) (null) +4 256 true + TABLE + end + + test("column <= value") do + sliced_table = @table.slice do |slicer| + slicer.count <= 16 + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 0 (null) +1 1 true +2 2 false +3 4 (null) +4 8 true +5 16 true +6 (null) (null) + TABLE + end + + test("!(column <= value)") do + sliced_table = @table.slice do |slicer| + !(slicer.count <= 16) + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 32 false +1 64 (null) +2 (null) (null) +3 256 true + TABLE + end + + test("column > value") do + sliced_table = @table.slice do |slicer| + slicer.count > 16 + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 32 false +1 64 (null) +2 (null) (null) +3 256 true + TABLE + end + + test("!(column > value)") do + sliced_table = @table.slice do |slicer| + !(slicer.count > 16) + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 0 (null) +1 1 true +2 2 false +3 4 (null) +4 8 true +5 16 true +6 (null) (null) + TABLE + end + + test("column >= value") do + sliced_table = @table.slice do |slicer| + slicer.count >= 16 + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 16 true +1 32 false +2 64 (null) +3 (null) (null) +4 256 true + TABLE + end + + test("!(column >= value)") do + sliced_table = @table.slice do |slicer| + !(slicer.count >= 16) + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 0 (null) +1 1 true +2 2 false +3 4 (null) +4 8 true +5 (null) (null) + TABLE + end + + test("column.in") do + sliced_table = @table.slice do |slicer| + slicer.count.in?([1, 4, 16, 64]) + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 1 true +1 4 (null) +2 16 true +3 64 (null) + TABLE + end + + test("!column.in") do + sliced_table = @table.slice do |slicer| + !slicer.count.in?([1, 4, 16, 64]) + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 0 (null) +1 2 false +2 8 true +3 32 false +4 (null) (null) +5 256 true + TABLE + end + + test("condition & condition") do + sliced_table = @table.slice do |slicer| + slicer.visible & (slicer.count >= 16) + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 (null) (null) +1 (null) (null) +2 16 true +3 (null) (null) +4 (null) (null) +5 256 true + TABLE + end + + test("condition | condition") do + sliced_table = @table.slice do |slicer| + slicer.visible | (slicer.count >= 16) + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 (null) (null) +1 1 true +2 (null) (null) +3 8 true +4 16 true +5 32 false +6 (null) (null) +7 (null) (null) +8 256 true + TABLE + end + + test("condition ^ condition") do + sliced_table = @table.slice do |slicer| + slicer.visible ^ (slicer.count >= 16) + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 (null) (null) +1 1 true +2 (null) (null) +3 8 true +4 32 false +5 (null) (null) +6 (null) (null) + TABLE + end + + test("select") do + sliced_table = @table.slice do |slicer| + slicer.visible.select do |value| + value.nil? or value + end + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 0 (null) +1 1 true +2 4 (null) +3 8 true +4 16 true +5 64 (null) +6 (null) (null) +7 256 true + TABLE + end + + test("!select") do + sliced_table = @table.slice do |slicer| + !slicer.visible.select do |value| + value.nil? or value + end + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 2 false +1 32 false + TABLE + end + + test("reject") do + sliced_table = @table.slice do |slicer| + slicer.visible.reject do |value| + value.nil? or value + end + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 2 false +1 32 false + TABLE + end + + test("!reject") do + sliced_table = @table.slice do |slicer| + !slicer.visible.reject do |value| + value.nil? or value + end + end + assert_equal(<<-TABLE, sliced_table.to_s) + count visible +0 0 (null) +1 1 true +2 4 (null) +3 8 true +4 16 true +5 64 (null) +6 (null) (null) +7 256 true + TABLE + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-sort-indices.rb b/src/arrow/ruby/red-arrow/test/test-sort-indices.rb new file mode 100644 index 000000000..b177831fe --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-sort-indices.rb @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class SortIndicesTest < Test::Unit::TestCase + def setup + @table = Arrow::Table.new(number1: [16, -1, 2, 32, -4, -4, -8], + number2: [32, 2, -16, 8, 1, 4, 1]) + end + + sub_test_case("Table") do + test("Symbol") do + assert_equal(Arrow::UInt64Array.new([6, 4, 5, 1, 2, 0, 3]), + @table.sort_indices(:number1)) + end + + test("-String") do + assert_equal(Arrow::UInt64Array.new([3, 0, 2, 1, 4, 5, 6]), + @table.sort_indices("-number1")) + end + + test("Symbol, -String") do + assert_equal(Arrow::UInt64Array.new([6, 5, 4, 1, 2, 0, 3]), + @table.sort_indices([:number1, "-number2"])) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-sort-key.rb b/src/arrow/ruby/red-arrow/test/test-sort-key.rb new file mode 100644 index 000000000..0a31f8461 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-sort-key.rb @@ -0,0 +1,81 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class SortKeyTest < Test::Unit::TestCase + sub_test_case(".resolve") do + test("SortKey") do + assert_equal(Arrow::SortKey.new("-count"), + Arrow::SortKey.resolve(Arrow::SortKey.new("-count"))) + end + + test("-String") do + assert_equal(Arrow::SortKey.new("-count"), + Arrow::SortKey.resolve("-count")) + end + + test("Symbol, Symbol") do + assert_equal(Arrow::SortKey.new("-count"), + Arrow::SortKey.resolve(:count, :desc)) + end + end + + sub_test_case("#initialize") do + test("String") do + assert_equal("+count", + Arrow::SortKey.new("count").to_s) + end + + test("+String") do + assert_equal("+count", + Arrow::SortKey.new("+count").to_s) + end + + test("-String") do + assert_equal("-count", + Arrow::SortKey.new("-count").to_s) + end + + test("Symbol") do + assert_equal("+-count", + Arrow::SortKey.new(:"-count").to_s) + end + + test("String, Symbol") do + assert_equal("--count", + Arrow::SortKey.new("-count", :desc).to_s) + end + + test("String, String") do + assert_equal("--count", + Arrow::SortKey.new("-count", "desc").to_s) + end + + test("String, SortOrder") do + assert_equal("--count", + Arrow::SortKey.new("-count", + Arrow::SortOrder::DESCENDING).to_s) + end + end + + sub_test_case("#to_s") do + test("recreatable") do + key = Arrow::SortKey.new("-count", :desc) + assert_equal(key, + Arrow::SortKey.new(key.to_s)) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-sort-options.rb b/src/arrow/ruby/red-arrow/test/test-sort-options.rb new file mode 100644 index 000000000..0afd65b0f --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-sort-options.rb @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class SortOptionsTest < Test::Unit::TestCase + sub_test_case("#initialize") do + test("none") do + options = Arrow::SortOptions.new + assert_equal([], + options.sort_keys.collect(&:to_s)) + end + + test("-String, Symbol") do + options = Arrow::SortOptions.new("-count", :age) + assert_equal(["-count", "+age"], + options.sort_keys.collect(&:to_s)) + end + end + + sub_test_case("instance methods") do + setup do + @options = Arrow::SortOptions.new + end + + sub_test_case("#add_sort_key") do + test("-String") do + @options.add_sort_key("-count") + assert_equal(["-count"], + @options.sort_keys.collect(&:to_s)) + end + + test("-String, Symbol") do + @options.add_sort_key("-count", :desc) + assert_equal(["--count"], + @options.sort_keys.collect(&:to_s)) + end + + test("SortKey") do + @options.add_sort_key(Arrow::SortKey.new("-count")) + assert_equal(["-count"], + @options.sort_keys.collect(&:to_s)) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-sparse-union-data-type.rb b/src/arrow/ruby/red-arrow/test/test-sparse-union-data-type.rb new file mode 100644 index 000000000..e672f82d4 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-sparse-union-data-type.rb @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class SparseUnionDataTypeTest < Test::Unit::TestCase + sub_test_case(".new") do + def setup + @fields = [ + Arrow::Field.new("visible", :boolean), + { + name: "count", + type: :int32, + }, + ] + end + + test("ordered arguments") do + assert_equal("sparse_union<visible: bool=2, count: int32=9>", + Arrow::SparseUnionDataType.new(@fields, [2, 9]).to_s) + end + + test("description") do + assert_equal("sparse_union<visible: bool=2, count: int32=9>", + Arrow::SparseUnionDataType.new(fields: @fields, + type_codes: [2, 9]).to_s) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-string-dictionary-array-builder.rb b/src/arrow/ruby/red-arrow/test/test-string-dictionary-array-builder.rb new file mode 100644 index 000000000..d6df509ed --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-string-dictionary-array-builder.rb @@ -0,0 +1,103 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class StringDictionaryArrayBuilderTest < Test::Unit::TestCase + def setup + @builder = Arrow::StringDictionaryArrayBuilder.new + end + + sub_test_case("#append_values") do + test("[nil]") do + @builder.append_values([nil]) + array = @builder.finish + assert_equal([ + [], + [nil], + ], + [ + array.dictionary.to_a, + array.indices.to_a, + ]) + end + + test("[String]") do + @builder.append_values(["hello"]) + array = @builder.finish + assert_equal([ + ["hello"], + [0], + ], + [ + array.dictionary.to_a, + array.indices.to_a, + ]) + end + + test("[Symbol]") do + @builder.append_values([:hello]) + array = @builder.finish + assert_equal([ + ["hello"], + [0], + ], + [ + array.dictionary.to_a, + array.indices.to_a, + ]) + end + + test("[nil, String, Symbol]") do + @builder.append_values([ + nil, + "Hello", + :world, + "world", + ]) + array = @builder.finish + assert_equal([ + ["Hello", "world"], + [nil, 0, 1, 1], + ], + [ + array.dictionary.to_a, + array.indices.to_a, + ]) + end + + test("is_valids") do + @builder.append_values([ + "Hello", + :world, + :goodbye, + ], + [ + true, + false, + true, + ]) + array = @builder.finish + assert_equal([ + ["Hello", "goodbye"], + [0, nil, 1], + ], + [ + array.dictionary.to_a, + array.indices.to_a, + ]) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-struct-array-builder.rb b/src/arrow/ruby/red-arrow/test/test-struct-array-builder.rb new file mode 100644 index 000000000..ab0aa5edf --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-struct-array-builder.rb @@ -0,0 +1,184 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class StructArrayBuilderTest < Test::Unit::TestCase + def setup + @data_type = Arrow::StructDataType.new(visible: {type: :boolean}, + count: {type: :uint64}) + @builder = Arrow::StructArrayBuilder.new(@data_type) + end + + sub_test_case("#append_value") do + test("nil") do + @builder.append_value(nil) + array = @builder.finish + assert_equal([ + [false], + [0], + ], + [ + array.find_field(0).to_a, + array.find_field(1).to_a, + ]) + end + + test("Array") do + @builder.append_value([true, 1]) + @builder.append_value([]) + @builder.append_value([false]) + array = @builder.finish + assert_equal([ + [true, nil, false], + [1, nil, nil], + ], + [ + array.find_field(0).to_a, + array.find_field(1).to_a, + ]) + end + + test("Arrow::Struct") do + source_array = Arrow::StructArray.new(@data_type, [[true, 1]]) + struct = source_array.get_value(0) + @builder.append_value(struct) + array = @builder.finish + assert_equal([ + [true], + [1], + ], + [ + array.find_field(0).to_a, + array.find_field(1).to_a, + ]) + end + + test("Hash") do + @builder.append_value(count: 1, visible: true) + @builder.append_value(visible: false) + @builder.append_value(count: 2) + array = @builder.finish + assert_equal([ + [true, false, nil], + [1, nil, 2], + ], + [ + array.find_field(0).to_a, + array.find_field(1).to_a, + ]) + end + end + + sub_test_case("#append_values") do + test("[nil]") do + @builder.append_values([nil]) + array = @builder.finish + assert_equal([ + [false], + [0], + ], + [ + array.find_field(0).to_a, + array.find_field(1).to_a, + ]) + end + + test("[Array]") do + @builder.append_values([[true, 1]]) + array = @builder.finish + assert_equal([ + [true], + [1], + ], + [ + array.find_field(0).to_a, + array.find_field(1).to_a, + ]) + end + + test("[Hash]") do + @builder.append_values([{count: 1, visible: true}]) + array = @builder.finish + assert_equal([ + [true], + [1], + ], + [ + array.find_field(0).to_a, + array.find_field(1).to_a, + ]) + end + + test("[nil, Array, Hash]") do + @builder.append_values([ + nil, + [true, 1], + {count: 2, visible: false}, + ]) + array = @builder.finish + assert_equal([ + [false, true, false], + [0, 1, 2], + ], + [ + array.find_field(0).to_a, + array.find_field(1).to_a, + ]) + end + + test("is_valids") do + @builder.append_values([ + [true, 1], + [false, 2], + [true, 3], + ], + [ + true, + false, + true, + ]) + array = @builder.finish + assert_equal([ + [true, false, true], + [1, 0, 3], + ], + [ + array.find_field(0).to_a, + array.find_field(1).to_a, + ]) + end + end + + sub_test_case("#append") do + test("backward compatibility") do + @builder.append + @builder.get_field_builder(0).append(true) + @builder.get_field_builder(1).append(1) + @builder.append + @builder.get_field_builder(0).append(false) + @builder.get_field_builder(1).append(2) + array = @builder.finish + assert_equal([ + {"visible" => true, "count" => 1}, + {"visible" => false, "count" => 2}, + ], + [ + array.get_value(0), + array.get_value(1), + ]) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-struct-array.rb b/src/arrow/ruby/red-arrow/test/test-struct-array.rb new file mode 100644 index 000000000..2c01f33ef --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-struct-array.rb @@ -0,0 +1,94 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class StructArrayTest < Test::Unit::TestCase + sub_test_case(".new") do + test("build") do + data_type = Arrow::StructDataType.new(visible: :boolean, + count: :uint64) + values = [ + [true, 1], + nil, + [false, 2], + ] + array = Arrow::StructArray.new(data_type, values) + assert_equal([ + [true, false, false], + [1, 0, 2], + ], + [ + array.find_field(0).to_a, + array.find_field(1).to_a, + ]) + end + end + + sub_test_case("instance methods") do + def setup + @data_type = Arrow::StructDataType.new(visible: {type: :boolean}, + count: {type: :uint64}) + @values = [ + [true, 1], + [false, 2], + ] + @array = Arrow::StructArray.new(@data_type, @values) + end + + test("#[]") do + assert_equal([ + {"visible" => true, "count" => 1}, + {"visible" => false, "count" => 2}, + ], + @array.to_a) + end + + test("#get_value") do + assert_equal([ + {"visible" => true, "count" => 1}, + {"visible" => false, "count" => 2}, + ], + [ + @array.get_value(0), + @array.get_value(1), + ]) + end + + sub_test_case("#find_field") do + test("Integer") do + assert_equal([ + [true, false], + [1, 2], + ], + [ + @array.find_field(0).to_a, + @array.find_field(1).to_a, + ]) + end + + test("String, Symbol") do + assert_equal([ + [true, false], + [1, 2], + ], + [ + @array.find_field("visible").to_a, + @array.find_field(:count).to_a, + ]) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-struct-data-type.rb b/src/arrow/ruby/red-arrow/test/test-struct-data-type.rb new file mode 100644 index 000000000..d106e38b1 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-struct-data-type.rb @@ -0,0 +1,112 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class StructDataTypeTest < Test::Unit::TestCase + def setup + @count_field = Arrow::Field.new("count", :uint32) + @visible_field = Arrow::Field.new("visible", :boolean) + end + + sub_test_case(".new") do + test("[Arrow::Field]") do + fields = [ + @count_field, + @visible_field, + ] + assert_equal("struct<count: uint32, visible: bool>", + Arrow::StructDataType.new(fields).to_s) + end + + test("[Hash]") do + fields = [ + {name: "count", data_type: :uint32}, + {name: "visible", data_type: :boolean}, + ] + assert_equal("struct<count: uint32, visible: bool>", + Arrow::StructDataType.new(fields).to_s) + end + + test("[Arrow::Field, Hash]") do + fields = [ + @count_field, + {name: "visible", data_type: :boolean}, + ] + assert_equal("struct<count: uint32, visible: bool>", + Arrow::StructDataType.new(fields).to_s) + end + + test("{Arrow::DataType}") do + fields = { + "count" => Arrow::UInt32DataType.new, + "visible" => Arrow::BooleanDataType.new, + } + assert_equal("struct<count: uint32, visible: bool>", + Arrow::StructDataType.new(fields).to_s) + end + + test("{Hash}") do + fields = { + "count" => {type: :uint32}, + "visible" => {type: :boolean}, + } + assert_equal("struct<count: uint32, visible: bool>", + Arrow::StructDataType.new(fields).to_s) + end + + test("{String, Symbol}") do + fields = { + "count" => "uint32", + "visible" => :boolean, + } + assert_equal("struct<count: uint32, visible: bool>", + Arrow::StructDataType.new(fields).to_s) + end + end + + sub_test_case("instance methods") do + def setup + super + @data_type = Arrow::StructDataType.new([@count_field, @visible_field]) + end + + sub_test_case("#[]") do + test("[String]") do + assert_equal([@count_field, @visible_field], + [@data_type["count"], @data_type["visible"]]) + end + + test("[Symbol]") do + assert_equal([@count_field, @visible_field], + [@data_type[:count], @data_type[:visible]]) + end + + test("[Integer]") do + assert_equal([@count_field, @visible_field], + [@data_type[0], @data_type[1]]) + end + + test("[invalid]") do + invalid = [] + message = "field name or index must be String, Symbol or Integer" + message << ": <#{invalid.inspect}>" + assert_raise(ArgumentError.new(message)) do + @data_type[invalid] + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-table.rb b/src/arrow/ruby/red-arrow/test/test-table.rb new file mode 100644 index 000000000..78361a824 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-table.rb @@ -0,0 +1,925 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TableTest < Test::Unit::TestCase + include Helper::Fixture + + def setup + @count_field = Arrow::Field.new("count", :uint8) + @visible_field = Arrow::Field.new("visible", :boolean) + schema = Arrow::Schema.new([@count_field, @visible_field]) + count_arrays = [ + Arrow::UInt8Array.new([1, 2]), + Arrow::UInt8Array.new([4, 8, 16]), + Arrow::UInt8Array.new([32, 64]), + Arrow::UInt8Array.new([128]), + ] + visible_arrays = [ + Arrow::BooleanArray.new([true, false, nil]), + Arrow::BooleanArray.new([true]), + Arrow::BooleanArray.new([true, false]), + Arrow::BooleanArray.new([nil]), + Arrow::BooleanArray.new([nil]), + ] + @count_array = Arrow::ChunkedArray.new(count_arrays) + @visible_array = Arrow::ChunkedArray.new(visible_arrays) + @table = Arrow::Table.new(schema, [@count_array, @visible_array]) + end + + test("#columns") do + assert_equal([ + Arrow::Column.new(@table, 0), + Arrow::Column.new(@table, 1), + ], + @table.columns) + end + + sub_test_case("#slice") do + test("Arrow::BooleanArray") do + target_rows_raw = [nil, true, true, false, true, false, true, true] + target_rows = Arrow::BooleanArray.new(target_rows_raw) + assert_equal(<<-TABLE, @table.slice(target_rows).to_s) + count visible +0 (null) (null) +1 2 false +2 4 (null) +3 16 true +4 64 (null) +5 128 (null) + TABLE + end + + test("Array: boolean") do + target_rows_raw = [nil, true, true, false, true, false, true, true] + assert_equal(<<-TABLE, @table.slice(target_rows_raw).to_s) + count visible +0 (null) (null) +1 2 false +2 4 (null) +3 16 true +4 64 (null) +5 128 (null) + TABLE + end + + test("Integer: positive") do + assert_equal({"count" => 128, "visible" => nil}, + @table.slice(@table.n_rows - 1).to_h) + end + + test("Integer: negative") do + assert_equal({"count" => 1, "visible" => true}, + @table.slice(-@table.n_rows).to_h) + end + + test("Integer: out of index") do + assert_equal([ + nil, + nil, + ], + [ + @table.slice(@table.n_rows), + @table.slice(-(@table.n_rows + 1)), + ]) + end + + test("Range: positive: include end") do + assert_equal(<<-TABLE, @table.slice(2..4).to_s) + count visible +0 4 (null) +1 8 true +2 16 true + TABLE + end + + test("Range: positive: exclude end") do + assert_equal(<<-TABLE, @table.slice(2...4).to_s) + count visible +0 4 (null) +1 8 true + TABLE + end + + test("Range: negative: include end") do + assert_equal(<<-TABLE, @table.slice(-4..-2).to_s) + count visible +0 16 true +1 32 false +2 64 (null) + TABLE + end + + test("Range: negative: exclude end") do + assert_equal(<<-TABLE, @table.slice(-4...-2).to_s) + count visible +0 16 true +1 32 false + TABLE + end + + test("[from, to]: positive") do + assert_equal(<<-TABLE, @table.slice(0, 2).to_s) + count visible +0 1 true +1 2 false + TABLE + end + + test("[from, to]: negative") do + assert_equal(<<-TABLE, @table.slice(-4, 2).to_s) + count visible +0 16 true +1 32 false + TABLE + end + + test("{key: Number}") do + assert_equal(<<-TABLE, @table.slice(count: 16).to_s) + count visible +0 16 true + TABLE + end + + test("{key: String}") do + table = Arrow::Table.new(name: Arrow::StringArray.new(["a", "b", "c"])) + assert_equal(<<-TABLE, table.slice(name: 'b').to_s) + name +0 b + TABLE + end + + test("{key: true}") do + assert_equal(<<-TABLE, @table.slice(visible: true).to_s) + count visible +0 1 true +1 (null) (null) +2 8 true +3 16 true +4 (null) (null) +5 (null) (null) + TABLE + end + + test("{key: false}") do + assert_equal(<<-TABLE, @table.slice(visible: false).to_s) + count visible +0 2 false +1 (null) (null) +2 32 false +3 (null) (null) +4 (null) (null) + TABLE + end + + test("{key: Range}: beginless include end") do + assert_equal(<<-TABLE, @table.slice(count: ..8).to_s) + count visible +0 1 true +1 2 false +2 4 (null) +3 8 true + TABLE + end + + test("{key: Range}: beginless exclude end") do + assert_equal(<<-TABLE, @table.slice(count: ...8).to_s) + count visible +0 1 true +1 2 false +2 4 (null) + TABLE + end + + test("{key: Range}: endless") do + assert_equal(<<-TABLE, @table.slice(count: 16..).to_s) + count visible +0 16 true +1 32 false +2 64 (null) +3 128 (null) + TABLE + end + + test("{key: Range}: include end") do + assert_equal(<<-TABLE, @table.slice(count: 1..16).to_s) + count visible +0 1 true +1 2 false +2 4 (null) +3 8 true +4 16 true + TABLE + end + + test("{key: Range}: exclude end") do + assert_equal(<<-TABLE, @table.slice(count: 1...16).to_s) + count visible +0 1 true +1 2 false +2 4 (null) +3 8 true + TABLE + end + + test("{key1: Range, key2: true}") do + assert_equal(<<-TABLE, @table.slice(count: 0..8, visible: false).to_s) + count visible +0 2 false +1 (null) (null) +2 (null) (null) +3 (null) (null) + TABLE + end + + sub_test_case("wrong argument") do + test("no arguments") do + message = "wrong number of arguments (given 0, expected 1..2)" + assert_raise(ArgumentError.new(message)) do + @table.slice + end + end + + test("too many arguments") do + message = "wrong number of arguments (given 3, expected 1..2)" + assert_raise(ArgumentError.new(message)) do + @table.slice(1, 2, 3) + end + end + + test("arguments: with block") do + message = "must not specify both arguments and block" + assert_raise(ArgumentError.new(message)) do + @table.slice(1, 2) {} + end + end + + test("offset: too small") do + n_rows = @table.n_rows + offset = -(n_rows + 1) + message = "offset is out of range (-#{n_rows + 1},#{n_rows}): #{offset}" + assert_raise(ArgumentError.new(message)) do + @table.slice(offset, 1) + end + end + + test("offset: too large") do + n_rows = @table.n_rows + offset = n_rows + message = "offset is out of range (-#{n_rows + 1},#{n_rows}): #{offset}" + assert_raise(ArgumentError.new(message)) do + @table.slice(offset, 1) + end + end + end + end + + sub_test_case("#[]") do + def setup + @table = Arrow::Table.new(a: [true], + b: [true], + c: [true], + d: [true], + e: [true], + f: [true], + g: [true]) + end + + test("[String]") do + assert_equal(Arrow::Column.new(@table, 0), + @table["a"]) + end + + test("[Symbol]") do + assert_equal(Arrow::Column.new(@table, 1), + @table[:b]) + end + + test("[Integer]") do + assert_equal(Arrow::Column.new(@table, 6), + @table[-1]) + end + + test("[Range]") do + assert_equal(Arrow::Table.new(d: [true], + e: [true]), + @table[3..4]) + end + + test("[[Symbol, String, Integer, Range]]") do + assert_equal(Arrow::Table.new(c: [true], + a: [true], + g: [true], + d: [true], + e: [true]), + @table[[:c, "a", -1, 3..4]]) + end + end + + sub_test_case("#merge") do + sub_test_case("Hash") do + test("add") do + name_array = Arrow::StringArray.new(["a", "b", "c", "d", "e", "f", "g", "h"]) + assert_equal(<<-TABLE, @table.merge(:name => name_array).to_s) + count visible name +0 1 true a +1 2 false b +2 4 (null) c +3 8 true d +4 16 true e +5 32 false f +6 64 (null) g +7 128 (null) h + TABLE + end + + test("remove") do + assert_equal(<<-TABLE, @table.merge(:visible => nil).to_s) + count +0 1 +1 2 +2 4 +3 8 +4 16 +5 32 +6 64 +7 128 + TABLE + end + + test("replace") do + visible_array = Arrow::Int32Array.new([1] * @visible_array.length) + assert_equal(<<-TABLE, @table.merge(:visible => visible_array).to_s) + count visible +0 1 1 +1 2 1 +2 4 1 +3 8 1 +4 16 1 +5 32 1 +6 64 1 +7 128 1 + TABLE + end + end + + sub_test_case("Arrow::Table") do + test("add") do + name_array = Arrow::StringArray.new(["a", "b", "c", "d", "e", "f", "g", "h"]) + table = Arrow::Table.new("name" => name_array) + assert_equal(<<-TABLE, @table.merge(table).to_s) + count visible name +0 1 true a +1 2 false b +2 4 (null) c +3 8 true d +4 16 true e +5 32 false f +6 64 (null) g +7 128 (null) h + TABLE + end + + test("replace") do + visible_array = Arrow::Int32Array.new([1] * @visible_array.length) + table = Arrow::Table.new("visible" => visible_array) + assert_equal(<<-TABLE, @table.merge(table).to_s) + count visible +0 1 1 +1 2 1 +2 4 1 +3 8 1 +4 16 1 +5 32 1 +6 64 1 +7 128 1 + TABLE + end + end + end + + test("column name getter") do + assert_equal(Arrow::Column.new(@table, 1), + @table.visible) + end + + sub_test_case("#remove_column") do + test("String") do + assert_equal(<<-TABLE, @table.remove_column("visible").to_s) + count +0 1 +1 2 +2 4 +3 8 +4 16 +5 32 +6 64 +7 128 + TABLE + end + + test("Symbol") do + assert_equal(<<-TABLE, @table.remove_column(:visible).to_s) + count +0 1 +1 2 +2 4 +3 8 +4 16 +5 32 +6 64 +7 128 + TABLE + end + + test("unknown column name") do + assert_raise(KeyError) do + @table.remove_column(:nonexistent) + end + end + + test("Integer") do + assert_equal(<<-TABLE, @table.remove_column(1).to_s) + count +0 1 +1 2 +2 4 +3 8 +4 16 +5 32 +6 64 +7 128 + TABLE + end + + test("negative integer") do + assert_equal(<<-TABLE, @table.remove_column(-1).to_s) + count +0 1 +1 2 +2 4 +3 8 +4 16 +5 32 +6 64 +7 128 + TABLE + end + + test("too small index") do + assert_raise(IndexError) do + @table.remove_column(-3) + end + end + + test("too large index") do + assert_raise(IndexError) do + @table.remove_column(2) + end + end + end + + sub_test_case("#select_columns") do + def setup + raw_table = { + :a => Arrow::UInt8Array.new([1]), + :b => Arrow::UInt8Array.new([1]), + :c => Arrow::UInt8Array.new([1]), + :d => Arrow::UInt8Array.new([1]), + :e => Arrow::UInt8Array.new([1]), + } + @table = Arrow::Table.new(raw_table) + end + + test("names") do + assert_equal(<<-TABLE, @table.select_columns(:c, :a).to_s) + c a +0 1 1 + TABLE + end + + test("range") do + assert_equal(<<-TABLE, @table.select_columns(2...4).to_s) + c d +0 1 1 + TABLE + end + + test("indexes") do + assert_equal(<<-TABLE, @table.select_columns(0, -1, 2).to_s) + a e c +0 1 1 1 + TABLE + end + + test("mixed") do + assert_equal(<<-TABLE, @table.select_columns(:a, -1, 2..3).to_s) + a e c d +0 1 1 1 1 + TABLE + end + + test("block") do + selected_table = @table.select_columns.with_index do |column, i| + column.name == "a" or i.odd? + end + assert_equal(<<-TABLE, selected_table.to_s) + a b d +0 1 1 1 + TABLE + end + + test("names, indexes and block") do + selected_table = @table.select_columns(:a, -1) do |column| + column.name == "a" + end + assert_equal(<<-TABLE, selected_table.to_s) + a +0 1 + TABLE + end + end + + sub_test_case("#save and .load") do + module SaveLoadFormatTests + def test_default + output = create_output(".arrow") + @table.save(output) + assert_equal(@table, Arrow::Table.load(output)) + end + + def test_arrow_file + output = create_output(".arrow") + @table.save(output, format: :arrow_file) + assert_equal(@table, Arrow::Table.load(output, format: :arrow_file)) + end + + def test_batch + output = create_output(".arrow") + @table.save(output, format: :batch) + assert_equal(@table, Arrow::Table.load(output, format: :batch)) + end + + def test_arrow_streaming + output = create_output(".arrow") + @table.save(output, format: :arrow_streaming) + assert_equal(@table, Arrow::Table.load(output, format: :arrow_streaming)) + end + + def test_stream + output = create_output(".arrow") + @table.save(output, format: :stream) + assert_equal(@table, Arrow::Table.load(output, format: :stream)) + end + + def test_csv + output = create_output(".csv") + @table.save(output, format: :csv) + assert_equal(@table, + Arrow::Table.load(output, + format: :csv, + schema: @table.schema)) + end + + def test_csv_gz + output = create_output(".csv.gz") + @table.save(output, + format: :csv, + compression: :gzip) + assert_equal(@table, + Arrow::Table.load(output, + format: :csv, + compression: :gzip, + schema: @table.schema)) + end + + def test_tsv + output = create_output(".tsv") + @table.save(output, format: :tsv) + assert_equal(@table, + Arrow::Table.load(output, + format: :tsv, + schema: @table.schema)) + end + end + + sub_test_case("path") do + sub_test_case(":format") do + include SaveLoadFormatTests + + def create_output(extension) + @file = Tempfile.new(["red-arrow", extension]) + @file.path + end + + sub_test_case("save: auto detect") do + test("csv") do + output = create_output(".csv") + @table.save(output) + assert_equal(@table, + Arrow::Table.load(output, + format: :csv, + schema: @table.schema)) + end + + test("csv.gz") do + output = create_output(".csv.gz") + @table.save(output) + assert_equal(@table, + Arrow::Table.load(output, + format: :csv, + compression: :gzip, + schema: @table.schema)) + end + + test("tsv") do + output = create_output(".tsv") + @table.save(output) + assert_equal(@table, + Arrow::Table.load(output, + format: :tsv, + schema: @table.schema)) + end + end + + sub_test_case("load: auto detect") do + test("arrow: file") do + output = create_output(".arrow") + @table.save(output, format: :arrow_file) + assert_equal(@table, Arrow::Table.load(output)) + end + + test("arrow: streaming") do + output = create_output(".arrow") + @table.save(output, format: :arrow_streaming) + assert_equal(@table, Arrow::Table.load(output)) + end + + test("csv") do + path = fixture_path("with-header.csv") + table = Arrow::Table.load(path, skip_lines: /^\#/) + assert_equal(<<-TABLE, table.to_s) + name score +0 alice 10 +1 bob 29 +2 chris -1 + TABLE + end + + test("csv.gz") do + file = Tempfile.new(["red-arrow", ".csv.gz"]) + file.close + Zlib::GzipWriter.open(file.path) do |gz| + gz.write(<<-CSV) +name,score +alice,10 +bob,29 +chris,-1 + CSV + end + assert_equal(<<-TABLE, Arrow::Table.load(file.path).to_s) + name score +0 alice 10 +1 bob 29 +2 chris -1 + TABLE + end + + test("tsv") do + file = Tempfile.new(["red-arrow", ".tsv"]) + file.puts(<<-TSV) +name\tscore +alice\t10 +bob\t29 +chris\t-1 + TSV + file.close + table = Arrow::Table.load(file.path) + assert_equal(<<-TABLE, table.to_s) + name score +0 alice 10 +1 bob 29 +2 chris -1 + TABLE + end + end + end + end + + sub_test_case("Buffer") do + sub_test_case(":format") do + include SaveLoadFormatTests + + def create_output(extension) + Arrow::ResizableBuffer.new(1024) + end + end + end + end + + test("#pack") do + packed_table = @table.pack + column_n_chunks = packed_table.columns.collect {|c| c.data.n_chunks} + assert_equal([[1, 1], <<-TABLE], [column_n_chunks, packed_table.to_s]) + count visible +0 1 true +1 2 false +2 4 (null) +3 8 true +4 16 true +5 32 false +6 64 (null) +7 128 (null) + TABLE + end + + sub_test_case("#to_s") do + sub_test_case(":format") do + def setup + columns = { + "count" => Arrow::UInt8Array.new([1, 2]), + "visible" => Arrow::BooleanArray.new([true, false]), + } + @table = Arrow::Table.new(columns) + end + + test(":column") do + assert_equal(<<-TABLE, @table.to_s(format: :column)) +count: uint8 +visible: bool +---- +count: + [ + [ + 1, + 2 + ] + ] +visible: + [ + [ + true, + false + ] + ] + TABLE + end + + test(":list") do + assert_equal(<<-TABLE, @table.to_s(format: :list)) +==================== 0 ==================== +count: 1 +visible: true +==================== 1 ==================== +count: 2 +visible: false + TABLE + end + + test(":table") do + assert_equal(<<-TABLE, @table.to_s(format: :table)) + count visible +0 1 true +1 2 false + TABLE + end + + test("invalid") do + message = ":format must be :column, :list, :table or nil: <:invalid>" + assert_raise(ArgumentError.new(message)) do + @table.to_s(format: :invalid) + end + end + end + + sub_test_case("#==") do + test("Arrow::Table") do + assert do + @table == @table + end + end + + test("not Arrow::Table") do + assert do + not (@table == 29) + end + end + end + end + + sub_test_case("#filter") do + def setup + super + @options = Arrow::FilterOptions.new + @options.null_selection_behavior = :emit_null + end + + test("Array: boolean") do + filter = [nil, true, true, false, true, false, true, true] + assert_equal(<<-TABLE, @table.filter(filter, @options).to_s) + count visible +0 (null) (null) +1 2 false +2 4 (null) +3 16 true +4 64 (null) +5 128 (null) + TABLE + end + + test("Arrow::BooleanArray") do + array = [nil, true, true, false, true, false, true, true] + filter = Arrow::BooleanArray.new(array) + assert_equal(<<-TABLE, @table.filter(filter, @options).to_s) + count visible +0 (null) (null) +1 2 false +2 4 (null) +3 16 true +4 64 (null) +5 128 (null) + TABLE + end + + test("Arrow::ChunkedArray") do + filter_chunks = [ + Arrow::BooleanArray.new([nil, true, true]), + Arrow::BooleanArray.new([false, true, false]), + Arrow::BooleanArray.new([true, true]), + ] + filter = Arrow::ChunkedArray.new(filter_chunks) + assert_equal(<<-TABLE, @table.filter(filter, @options).to_s) + count visible +0 (null) (null) +1 2 false +2 4 (null) +3 16 true +4 64 (null) +5 128 (null) + TABLE + end + end + + sub_test_case("#take") do + test("Arrow: boolean") do + indices = [1, 0, 2] + assert_equal(<<-TABLE, @table.take(indices).to_s) + count visible +0 2 false +1 1 true +2 4 (null) + TABLE + end + + test("Arrow::Array") do + indices = Arrow::Int16Array.new([1, 0, 2]) + assert_equal(<<-TABLE, @table.take(indices).to_s) + count visible +0 2 false +1 1 true +2 4 (null) + TABLE + end + + test("Arrow::ChunkedArray") do + chunks = [ + Arrow::Int16Array.new([1, 0]), + Arrow::Int16Array.new([2]) + ] + indices = Arrow::ChunkedArray.new(chunks) + assert_equal(<<-TABLE, @table.take(indices).to_s) + count visible +0 2 false +1 1 true +2 4 (null) + TABLE + end + end + + sub_test_case("#concatenate") do + test("options: :unify_schemas") do + table1 = Arrow::Table.new(a: [true], + b: [false]) + table2 = Arrow::Table.new(b: [false]) + concatenated = table1.concatenate([table2], unify_schemas: true) + assert_equal(<<-TABLE, concatenated.to_s) + a b +0 true false +1 (null) false + TABLE + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-tensor.rb b/src/arrow/ruby/red-arrow/test/test-tensor.rb new file mode 100644 index 000000000..ffa1e3241 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-tensor.rb @@ -0,0 +1,56 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TensorTest < Test::Unit::TestCase + sub_test_case("instance methods") do + def setup + raw_data = [ + 1, 2, + 3, 4, + + 5, 6, + 7, 8, + + 9, 10, + 11, 12, + ] + data = Arrow::Buffer.new(raw_data.pack("c*")) + shape = [3, 2, 2] + strides = [] + names = ["a", "b", "c"] + @tensor = Arrow::Tensor.new(Arrow::Int8DataType.new, + data, + shape, + strides, + names) + end + + sub_test_case("#==") do + test("Arrow::Tensor") do + assert do + @tensor == @tensor + end + end + + test("not Arrow::Tensor") do + assert do + not (@tensor == 29) + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-time.rb b/src/arrow/ruby/red-arrow/test/test-time.rb new file mode 100644 index 000000000..37c098c69 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-time.rb @@ -0,0 +1,288 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TimeTest < Test::Unit::TestCase + sub_test_case("#==") do + test("same unit") do + assert do + Arrow::Time.new(:second, 10) == Arrow::Time.new(:second, 10) + end + end + + test("different unit") do + assert do + Arrow::Time.new(:second, 10) == Arrow::Time.new(:milli, 10 * 1000) + end + end + + test("false") do + assert do + not(Arrow::Time.new(:second, 10) == Arrow::Time.new(:second, 11)) + end + end + end + + sub_test_case("#cast") do + test("same unit") do + time = Arrow::Time.new(Arrow::TimeUnit::SECOND, 10) + casted_time = time.cast(Arrow::TimeUnit::SECOND) + assert_equal([time.unit, time.value], + [casted_time.unit, casted_time.value]) + end + + test("second -> milli") do + time = Arrow::Time.new(Arrow::TimeUnit::SECOND, 10) + casted_time = time.cast(Arrow::TimeUnit::MILLI) + assert_equal([ + Arrow::TimeUnit::MILLI, + time.value * 1000, + ], + [ + casted_time.unit, + casted_time.value, + ]) + end + + test("second -> micro") do + time = Arrow::Time.new(Arrow::TimeUnit::SECOND, 10) + casted_time = time.cast(Arrow::TimeUnit::MICRO) + assert_equal([ + Arrow::TimeUnit::MICRO, + time.value * 1000 * 1000, + ], + [ + casted_time.unit, + casted_time.value, + ]) + end + + test("second -> nano") do + time = Arrow::Time.new(Arrow::TimeUnit::SECOND, 10) + casted_time = time.cast(Arrow::TimeUnit::NANO) + assert_equal([ + Arrow::TimeUnit::NANO, + time.value * 1000 * 1000 * 1000, + ], + [ + casted_time.unit, + casted_time.value, + ]) + end + + test("milli -> second") do + time = Arrow::Time.new(Arrow::TimeUnit::MILLI, 10_200) + casted_time = time.cast(Arrow::TimeUnit::SECOND) + assert_equal([ + Arrow::TimeUnit::SECOND, + 10, + ], + [ + casted_time.unit, + casted_time.value, + ]) + end + + test("milli -> micro") do + time = Arrow::Time.new(Arrow::TimeUnit::MILLI, 10_200) + casted_time = time.cast(Arrow::TimeUnit::MICRO) + assert_equal([ + Arrow::TimeUnit::MICRO, + time.value * 1000, + ], + [ + casted_time.unit, + casted_time.value, + ]) + end + + test("milli -> nano") do + time = Arrow::Time.new(Arrow::TimeUnit::MILLI, 10_200) + casted_time = time.cast(Arrow::TimeUnit::NANO) + assert_equal([ + Arrow::TimeUnit::NANO, + time.value * 1000 * 1000, + ], + [ + casted_time.unit, + casted_time.value, + ]) + end + + test("micro -> second") do + time = Arrow::Time.new(Arrow::TimeUnit::MICRO, 10_200_300) + casted_time = time.cast(Arrow::TimeUnit::SECOND) + assert_equal([ + Arrow::TimeUnit::SECOND, + 10, + ], + [ + casted_time.unit, + casted_time.value, + ]) + end + + test("micro -> milli") do + time = Arrow::Time.new(Arrow::TimeUnit::MICRO, 10_200_300) + casted_time = time.cast(Arrow::TimeUnit::MILLI) + assert_equal([ + Arrow::TimeUnit::MILLI, + 10_200, + ], + [ + casted_time.unit, + casted_time.value, + ]) + end + + test("micro -> nano") do + time = Arrow::Time.new(Arrow::TimeUnit::MICRO, 10_200_300) + casted_time = time.cast(Arrow::TimeUnit::NANO) + assert_equal([ + Arrow::TimeUnit::NANO, + time.value * 1000, + ], + [ + casted_time.unit, + casted_time.value, + ]) + end + + test("nano -> second") do + time = Arrow::Time.new(Arrow::TimeUnit::NANO, 10_200_300_400) + casted_time = time.cast(Arrow::TimeUnit::SECOND) + assert_equal([ + Arrow::TimeUnit::SECOND, + 10, + ], + [ + casted_time.unit, + casted_time.value, + ]) + end + + test("nano -> milli") do + time = Arrow::Time.new(Arrow::TimeUnit::NANO, 10_200_300_400) + casted_time = time.cast(Arrow::TimeUnit::MILLI) + assert_equal([ + Arrow::TimeUnit::MILLI, + 10_200, + ], + [ + casted_time.unit, + casted_time.value, + ]) + end + + test("nano -> micro") do + time = Arrow::Time.new(Arrow::TimeUnit::NANO, 10_200_300_400) + casted_time = time.cast(Arrow::TimeUnit::MICRO) + assert_equal([ + Arrow::TimeUnit::MICRO, + 10_200_300, + ], + [ + casted_time.unit, + casted_time.value, + ]) + end + end + + sub_test_case("#to_f") do + test("second") do + time = Arrow::Time.new(Arrow::TimeUnit::SECOND, 10) + assert_in_delta(10.0, time.to_f) + end + + test("milli") do + time = Arrow::Time.new(Arrow::TimeUnit::MILLI, 10_200) + assert_in_delta(10.2, time.to_f) + end + + test("micro") do + time = Arrow::Time.new(Arrow::TimeUnit::MICRO, 10_200_300) + assert_in_delta(10.2003, time.to_f) + end + + test("nano") do + time = Arrow::Time.new(Arrow::TimeUnit::NANO, 10_200_300_400) + assert_in_delta(10.2003004, time.to_f) + end + end + + sub_test_case("#positive?") do + test("true") do + time = Arrow::Time.new(Arrow::TimeUnit::SECOND, 10) + assert do + time.positive? + end + end + + test("false") do + time = Arrow::Time.new(Arrow::TimeUnit::SECOND, -10) + assert do + not time.positive? + end + end + end + + sub_test_case("#negative?") do + test("true") do + time = Arrow::Time.new(Arrow::TimeUnit::SECOND, -10) + assert do + time.negative? + end + end + + test("false") do + time = Arrow::Time.new(Arrow::TimeUnit::SECOND, 10) + assert do + not time.negative? + end + end + end + + test("#hour") do + time = Arrow::Time.new(Arrow::TimeUnit::SECOND, + (5 * 60 * 60) + (12 * 60) + 10) + assert_equal(5, time.hour) + end + + test("#minute") do + time = Arrow::Time.new(Arrow::TimeUnit::SECOND, + (5 * 60 * 60) + (12 * 60) + 10) + assert_equal(12, time.minute) + end + + test("#second") do + time = Arrow::Time.new(Arrow::TimeUnit::SECOND, + (5 * 60 * 60) + (12 * 60) + 10) + assert_equal(10, time.second) + end + + test("#nano_second") do + time = Arrow::Time.new(Arrow::TimeUnit::NANO, 1234) + assert_equal(1234, time.nano_second) + end + + test("#to_s") do + time = Arrow::Time.new(Arrow::TimeUnit::NANO, + -(((5 * 60 * 60) + (12 * 60) + 10) * 1_000_000_000 + + 1234)) + assert_equal("-05:12:10.000001234", + time.to_s) + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-time32-array.rb b/src/arrow/ruby/red-arrow/test/test-time32-array.rb new file mode 100644 index 000000000..b8bb4eb94 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-time32-array.rb @@ -0,0 +1,81 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Time32ArrayTest < Test::Unit::TestCase + sub_test_case(".new") do + sub_test_case("unit") do + test("Arrow::TimeUnit") do + values = [1000 * 10, nil] + array = Arrow::Time32Array.new(Arrow::TimeUnit::MILLI, values) + assert_equal([ + "time32[ms]", + [ + Arrow::Time.new(Arrow::TimeUnit::MILLI, + 1000 * 10), + nil, + ], + ], + [ + array.value_data_type.to_s, + array.to_a, + ]) + end + + test("Symbol") do + values = [60 * 10, nil] + array = Arrow::Time32Array.new(:second, values) + assert_equal([ + "time32[s]", + [ + Arrow::Time.new(Arrow::TimeUnit::SECOND, + 60 * 10), + nil, + ], + ], + [ + array.value_data_type.to_s, + array.to_a, + ]) + end + end + + sub_test_case("values") do + test("Arrow::Time") do + data_type = Arrow::Time32DataType.new(:second) + values = [ + Arrow::Time.new(Arrow::TimeUnit::SECOND, + 60 * 10), + nil, + ] + array = Arrow::Time32Array.new(data_type, values) + assert_equal(values, array.to_a) + end + + test("Integer") do + data_type = Arrow::Time32DataType.new(:second) + values = [60 * 10, nil] + array = Arrow::Time32Array.new(data_type, values) + assert_equal([ + Arrow::Time.new(Arrow::TimeUnit::SECOND, + 60 * 10), + nil, + ], + array.to_a) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-time32-data-type.rb b/src/arrow/ruby/red-arrow/test/test-time32-data-type.rb new file mode 100644 index 000000000..26f17359a --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-time32-data-type.rb @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Time32DataTypeTest < Test::Unit::TestCase + sub_test_case(".new") do + test("Arrow::TimeUnit") do + assert_equal("time32[ms]", + Arrow::Time32DataType.new(Arrow::TimeUnit::MILLI).to_s) + end + + test("Symbol") do + assert_equal("time32[ms]", + Arrow::Time32DataType.new(:milli).to_s) + end + + test("unit: Arrow::TimeUnit") do + data_type = Arrow::Time32DataType.new(unit: Arrow::TimeUnit::MILLI) + assert_equal("time32[ms]", + data_type.to_s) + end + + test("unit: Symbol") do + data_type = Arrow::Time32DataType.new(unit: :milli) + assert_equal("time32[ms]", + data_type.to_s) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-time64-array.rb b/src/arrow/ruby/red-arrow/test/test-time64-array.rb new file mode 100644 index 000000000..831af1e35 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-time64-array.rb @@ -0,0 +1,81 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Time64ArrayTest < Test::Unit::TestCase + sub_test_case(".new") do + sub_test_case("unit") do + test("Arrow::TimeUnit") do + values = [1000 * 10, nil] + array = Arrow::Time64Array.new(Arrow::TimeUnit::NANO, values) + assert_equal([ + "time64[ns]", + [ + Arrow::Time.new(Arrow::TimeUnit::NANO, + 1000 * 10), + nil, + ], + ], + [ + array.value_data_type.to_s, + array.to_a, + ]) + end + + test("Symbol") do + values = [1000 * 10, nil] + array = Arrow::Time64Array.new(:micro, values) + assert_equal([ + "time64[us]", + [ + Arrow::Time.new(Arrow::TimeUnit::MICRO, + 1000 * 10), + nil, + ], + ], + [ + array.value_data_type.to_s, + array.to_a, + ]) + end + end + + sub_test_case("values") do + test("Arrow::Time") do + data_type = Arrow::Time64DataType.new(:nano) + values = [ + Arrow::Time.new(Arrow::TimeUnit::NANO, + 1000 * 10), + nil, + ] + array = Arrow::Time64Array.new(data_type, values) + assert_equal(values, array.to_a) + end + + test("Integer") do + data_type = Arrow::Time64DataType.new(:nano) + values = [1000 * 10, nil] + array = Arrow::Time64Array.new(data_type, values) + assert_equal([ + Arrow::Time.new(Arrow::TimeUnit::NANO, + 1000 * 10), + nil, + ], + array.to_a) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-time64-data-type.rb b/src/arrow/ruby/red-arrow/test/test-time64-data-type.rb new file mode 100644 index 000000000..a5f341753 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-time64-data-type.rb @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Time64DataTypeTest < Test::Unit::TestCase + sub_test_case(".new") do + test("Arrow::TimeUnit") do + assert_equal("time64[ns]", + Arrow::Time64DataType.new(Arrow::TimeUnit::NANO).to_s) + end + + test("Symbol") do + assert_equal("time64[ns]", + Arrow::Time64DataType.new(:nano).to_s) + end + + test("unit: Arrow::TimeUnit") do + data_type = Arrow::Time64DataType.new(unit: Arrow::TimeUnit::NANO) + assert_equal("time64[ns]", + data_type.to_s) + end + + test("unit: Symbol") do + data_type = Arrow::Time64DataType.new(unit: :nano) + assert_equal("time64[ns]", + data_type.to_s) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-timestamp-array.rb b/src/arrow/ruby/red-arrow/test/test-timestamp-array.rb new file mode 100644 index 000000000..248a2531e --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-timestamp-array.rb @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TimestampArrayTest < Test::Unit::TestCase + test("#[]") do + sec = 1513267750 + usec = 914509 + array = Arrow::TimestampArray.new(:micro, [sec * (10 ** 6) + usec]) + time = Time.at(sec, usec) + assert_equal(time, array[0]) + end + + sub_test_case("#is_in") do + def setup + values = [ + Time.parse("2019-11-18T00:09:11"), + Time.parse("2019-11-18T00:09:12"), + Time.parse("2019-11-18T00:09:13"), + ] + @array = Arrow::TimestampArray.new(:micro, values) + end + + test("Arrow: Array") do + right = [ + Time.parse("2019-11-18T00:09:12"), + ] + assert_equal(Arrow::BooleanArray.new([false, true, false]), + @array.is_in(right)) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/test-timestamp-data-type.rb b/src/arrow/ruby/red-arrow/test/test-timestamp-data-type.rb new file mode 100644 index 000000000..f8ccd3d8b --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-timestamp-data-type.rb @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TimestampDataTypeTest < Test::Unit::TestCase + sub_test_case(".new") do + test("Arrow::TimeUnit") do + assert_equal("timestamp[ms]", + Arrow::TimestampDataType.new(Arrow::TimeUnit::MILLI).to_s) + end + + test("Symbol") do + assert_equal("timestamp[ms]", + Arrow::TimestampDataType.new(:milli).to_s) + end + + test("unit: Arrow::TimeUnit") do + data_type = Arrow::TimestampDataType.new(unit: Arrow::TimeUnit::MILLI) + assert_equal("timestamp[ms]", + data_type.to_s) + end + + test("unit: Symbol") do + data_type = Arrow::TimestampDataType.new(unit: :milli) + assert_equal("timestamp[ms]", + data_type.to_s) + end + end +end diff --git a/src/arrow/ruby/red-arrow/test/values/test-basic-arrays.rb b/src/arrow/ruby/red-arrow/test/values/test-basic-arrays.rb new file mode 100644 index 000000000..c54c7f62d --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/values/test-basic-arrays.rb @@ -0,0 +1,295 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module ValuesBasicArraysTests + def test_null + target = build(Arrow::NullArray.new(4)) + assert_equal([nil] * 4, target.values) + end + + def test_boolean + values = [true, nil, false] + target = build(Arrow::BooleanArray.new(values)) + assert_equal(values, target.values) + end + + def test_int8 + values = [ + -(2 ** 7), + nil, + (2 ** 7) - 1, + ] + target = build(Arrow::Int8Array.new(values)) + assert_equal(values, target.values) + end + + def test_uint8 + values = [ + 0, + nil, + (2 ** 8) - 1, + ] + target = build(Arrow::UInt8Array.new(values)) + assert_equal(values, target.values) + end + + def test_int16 + values = [ + -(2 ** 15), + nil, + (2 ** 15) - 1, + ] + target = build(Arrow::Int16Array.new(values)) + assert_equal(values, target.values) + end + + def test_uint16 + values = [ + 0, + nil, + (2 ** 16) - 1, + ] + target = build(Arrow::UInt16Array.new(values)) + assert_equal(values, target.values) + end + + def test_int32 + values = [ + -(2 ** 31), + nil, + (2 ** 31) - 1, + ] + target = build(Arrow::Int32Array.new(values)) + assert_equal(values, target.values) + end + + def test_uint32 + values = [ + 0, + nil, + (2 ** 32) - 1, + ] + target = build(Arrow::UInt32Array.new(values)) + assert_equal(values, target.values) + end + + def test_int64 + values = [ + -(2 ** 63), + nil, + (2 ** 63) - 1, + ] + target = build(Arrow::Int64Array.new(values)) + assert_equal(values, target.values) + end + + def test_uint64 + values = [ + 0, + nil, + (2 ** 64) - 1, + ] + target = build(Arrow::UInt64Array.new(values)) + assert_equal(values, target.values) + end + + def test_float + values = [ + -1.0, + nil, + 1.0, + ] + target = build(Arrow::FloatArray.new(values)) + assert_equal(values, target.values) + end + + def test_double + values = [ + -1.0, + nil, + 1.0, + ] + target = build(Arrow::DoubleArray.new(values)) + assert_equal(values, target.values) + end + + def test_binary + values = [ + "\x00".b, + nil, + "\xff".b, + ] + target = build(Arrow::BinaryArray.new(values)) + assert_equal(values, target.values) + end + + def test_tring + values = [ + "Ruby", + nil, + "\u3042", # U+3042 HIRAGANA LETTER A + ] + target = build(Arrow::StringArray.new(values)) + assert_equal(values, target.values) + end + + def test_date32 + values = [ + Date.new(1960, 1, 1), + nil, + Date.new(2017, 8, 23), + ] + target = build(Arrow::Date32Array.new(values)) + assert_equal(values, target.values) + end + + def test_date64 + values = [ + DateTime.new(1960, 1, 1, 2, 9, 30), + nil, + DateTime.new(2017, 8, 23, 14, 57, 2), + ] + target = build(Arrow::Date64Array.new(values)) + assert_equal(values, target.values) + end + + def test_timestamp_second + values = [ + Time.parse("1960-01-01T02:09:30Z"), + nil, + Time.parse("2017-08-23T14:57:02Z"), + ] + target = build(Arrow::TimestampArray.new(:second, values)) + assert_equal(values, target.values) + end + + def test_timestamp_milli + values = [ + Time.parse("1960-01-01T02:09:30.123Z"), + nil, + Time.parse("2017-08-23T14:57:02.987Z"), + ] + target = build(Arrow::TimestampArray.new(:milli, values)) + assert_equal(values, target.values) + end + + def test_timestamp_micro + values = [ + Time.parse("1960-01-01T02:09:30.123456Z"), + nil, + Time.parse("2017-08-23T14:57:02.987654Z"), + ] + target = build(Arrow::TimestampArray.new(:micro, values)) + assert_equal(values, target.values) + end + + def test_timestamp_nano + values = [ + Time.parse("1960-01-01T02:09:30.123456789Z"), + nil, + Time.parse("2017-08-23T14:57:02.987654321Z"), + ] + target = build(Arrow::TimestampArray.new(:nano, values)) + assert_equal(values, target.values) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + values = [ + Arrow::Time.new(unit, 60 * 10), # 00:10:00 + nil, + Arrow::Time.new(unit, 60 * 60 * 2 + 9), # 02:00:09 + ] + target = build(Arrow::Time32Array.new(:second, values)) + assert_equal(values, target.values) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + values = [ + Arrow::Time.new(unit, (60 * 10) * 1000 + 123), # 00:10:00.123 + nil, + Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987), # 02:00:09.987 + ] + target = build(Arrow::Time32Array.new(:milli, values)) + assert_equal(values, target.values) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + values = [ + # 00:10:00.123456 + Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456), + nil, + # 02:00:09.987654 + Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654), + ] + target = build(Arrow::Time64Array.new(:micro, values)) + assert_equal(values, target.values) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + values = [ + # 00:10:00.123456789 + Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789), + nil, + # 02:00:09.987654321 + Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321), + ] + target = build(Arrow::Time64Array.new(:nano, values)) + assert_equal(values, target.values) + end + + def test_decimal128 + values = [ + BigDecimal("92.92"), + nil, + BigDecimal("29.29"), + ] + data_type = Arrow::Decimal128DataType.new(8, 2) + target = build(Arrow::Decimal128Array.new(data_type, values)) + assert_equal(values, target.values) + end + + def test_decimal256 + values = [ + BigDecimal("92.92"), + nil, + BigDecimal("29.29"), + ] + data_type = Arrow::Decimal256DataType.new(38, 2) + target = build(Arrow::Decimal256Array.new(data_type, values)) + assert_equal(values, target.values) + end +end + +class ValuesArrayBasicArraysTest < Test::Unit::TestCase + include ValuesBasicArraysTests + + def build(array) + array + end +end + +class ValuesChunkedArrayBasicArraysTest < Test::Unit::TestCase + include ValuesBasicArraysTests + + def build(array) + Arrow::ChunkedArray.new([array]) + end +end diff --git a/src/arrow/ruby/red-arrow/test/values/test-dense-union-array.rb b/src/arrow/ruby/red-arrow/test/values/test-dense-union-array.rb new file mode 100644 index 000000000..465ffb9e6 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/values/test-dense-union-array.rb @@ -0,0 +1,482 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module ValuesDenseUnionArrayTests + def build_data_type(type, type_codes) + field_description = {} + if type.is_a?(Hash) + field_description = field_description.merge(type) + else + field_description[:type] = type + end + Arrow::DenseUnionDataType.new(fields: [ + field_description.merge(name: "0"), + field_description.merge(name: "1"), + ], + type_codes: type_codes) + end + + def build_array(type, values) + type_codes = [0, 1] + data_type = build_data_type(type, type_codes) + type_ids = [] + offsets = [] + arrays = data_type.fields.collect do |field| + sub_schema = Arrow::Schema.new([field]) + sub_records = [] + values.each do |value| + next if value.nil? + next unless value.key?(field.name) + sub_records << [value[field.name]] + end + sub_record_batch = Arrow::RecordBatch.new(sub_schema, + sub_records) + sub_record_batch.columns[0].data + end + values.each do |value| + if value.key?("0") + type_id = type_codes[0] + type_ids << type_id + offsets << (type_ids.count(type_id) - 1) + elsif value.key?("1") + type_id = type_codes[1] + type_ids << type_id + offsets << (type_ids.count(type_id) - 1) + end + end + Arrow::DenseUnionArray.new(data_type, + Arrow::Int8Array.new(type_ids), + Arrow::Int32Array.new(offsets), + arrays) + end + + def test_null + values = [ + {"0" => nil}, + ] + target = build(:null, values) + assert_equal(values, target.values) + end + + def test_boolean + values = [ + {"0" => true}, + {"1" => nil}, + ] + target = build(:boolean, values) + assert_equal(values, target.values) + end + + def test_int8 + values = [ + {"0" => -(2 ** 7)}, + {"1" => nil}, + ] + target = build(:int8, values) + assert_equal(values, target.values) + end + + def test_uint8 + values = [ + {"0" => (2 ** 8) - 1}, + {"1" => nil}, + ] + target = build(:uint8, values) + assert_equal(values, target.values) + end + + def test_int16 + values = [ + {"0" => -(2 ** 15)}, + {"1" => nil}, + ] + target = build(:int16, values) + assert_equal(values, target.values) + end + + def test_uint16 + values = [ + {"0" => (2 ** 16) - 1}, + {"1" => nil}, + ] + target = build(:uint16, values) + assert_equal(values, target.values) + end + + def test_int32 + values = [ + {"0" => -(2 ** 31)}, + {"1" => nil}, + ] + target = build(:int32, values) + assert_equal(values, target.values) + end + + def test_uint32 + values = [ + {"0" => (2 ** 32) - 1}, + {"1" => nil}, + ] + target = build(:uint32, values) + assert_equal(values, target.values) + end + + def test_int64 + values = [ + {"0" => -(2 ** 63)}, + {"1" => nil}, + ] + target = build(:int64, values) + assert_equal(values, target.values) + end + + def test_uint64 + values = [ + {"0" => (2 ** 64) - 1}, + {"1" => nil}, + ] + target = build(:uint64, values) + assert_equal(values, target.values) + end + + def test_float + values = [ + {"0" => -1.0}, + {"1" => nil}, + ] + target = build(:float, values) + assert_equal(values, target.values) + end + + def test_double + values = [ + {"0" => -1.0}, + {"1" => nil}, + ] + target = build(:double, values) + assert_equal(values, target.values) + end + + def test_binary + values = [ + {"0" => "\xff".b}, + {"1" => nil}, + ] + target = build(:binary, values) + assert_equal(values, target.values) + end + + def test_string + values = [ + {"0" => "Ruby"}, + {"1" => nil}, + ] + target = build(:string, values) + assert_equal(values, target.values) + end + + def test_date32 + values = [ + {"0" => Date.new(1960, 1, 1)}, + {"1" => nil}, + ] + target = build(:date32, values) + assert_equal(values, target.values) + end + + def test_date64 + values = [ + {"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}, + {"1" => nil}, + ] + target = build(:date64, values) + assert_equal(values, target.values) + end + + def test_timestamp_second + values = [ + {"0" => Time.parse("1960-01-01T02:09:30Z")}, + {"1" => nil}, + ] + target = build({ + type: :timestamp, + unit: :second, + }, + values) + assert_equal(values, target.values) + end + + def test_timestamp_milli + values = [ + {"0" => Time.parse("1960-01-01T02:09:30.123Z")}, + {"1" => nil}, + ] + target = build({ + type: :timestamp, + unit: :milli, + }, + values) + assert_equal(values, target.values) + end + + def test_timestamp_micro + values = [ + {"0" => Time.parse("1960-01-01T02:09:30.123456Z")}, + {"1" => nil}, + ] + target = build({ + type: :timestamp, + unit: :micro, + }, + values) + assert_equal(values, target.values) + end + + def test_timestamp_nano + values = [ + {"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}, + {"1" => nil}, + ] + target = build({ + type: :timestamp, + unit: :nano, + }, + values) + assert_equal(values, target.values) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + values = [ + # 00:10:00 + {"0" => Arrow::Time.new(unit, 60 * 10)}, + {"1" => nil}, + ] + target = build({ + type: :time32, + unit: :second, + }, + values) + assert_equal(values, target.values) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + values = [ + # 00:10:00.123 + {"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}, + {"1" => nil}, + ] + target = build({ + type: :time32, + unit: :milli, + }, + values) + assert_equal(values, target.values) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + values = [ + # 00:10:00.123456 + {"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}, + {"1" => nil}, + ] + target = build({ + type: :time64, + unit: :micro, + }, + values) + assert_equal(values, target.values) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + values = [ + # 00:10:00.123456789 + {"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}, + {"1" => nil}, + ] + target = build({ + type: :time64, + unit: :nano, + }, + values) + assert_equal(values, target.values) + end + + def test_decimal128 + values = [ + {"0" => BigDecimal("92.92")}, + {"1" => nil}, + ] + target = build({ + type: :decimal128, + precision: 8, + scale: 2, + }, + values) + assert_equal(values, target.values) + end + + def test_decimal256 + values = [ + {"0" => BigDecimal("92.92")}, + {"1" => nil}, + ] + target = build({ + type: :decimal256, + precision: 38, + scale: 2, + }, + values) + assert_equal(values, target.values) + end + + def test_list + values = [ + {"0" => [true, nil, false]}, + {"1" => nil}, + ] + target = build({ + type: :list, + field: { + name: :sub_element, + type: :boolean, + }, + }, + values) + assert_equal(values, target.values) + end + + def test_struct + values = [ + {"0" => {"sub_field" => true}}, + {"1" => nil}, + {"0" => {"sub_field" => nil}}, + ] + target = build({ + type: :struct, + fields: [ + { + name: :sub_field, + type: :boolean, + }, + ], + }, + values) + assert_equal(values, target.values) + end + + def test_map + values = [ + {"0" => {"key1" => true, "key2" => nil}}, + {"1" => nil}, + ] + target = build({ + type: :map, + key: :string, + item: :boolean, + }, + values) + assert_equal(values, target.values) + end + + def test_sparse_union + omit("Need to add support for SparseUnionArrayBuilder") + values = [ + {"0" => {"field1" => true}}, + {"1" => nil}, + {"0" => {"field2" => nil}}, + ] + target = build({ + type: :sparse_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + values) + assert_equal(values, target.values) + end + + def test_dense_union + omit("Need to add support for DenseUnionArrayBuilder") + values = [ + {"0" => {"field1" => true}}, + {"1" => nil}, + {"0" => {"field2" => nil}}, + ] + target = build({ + type: :dense_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + values) + assert_equal(values, target.values) + end + + def test_dictionary + omit("Need to add support for DictionaryArrayBuilder") + values = [ + {"0" => "Ruby"}, + {"1" => nil}, + {"0" => "GLib"}, + ] + dictionary = Arrow::StringArray.new(["GLib", "Ruby"]) + target = build({ + type: :dictionary, + index_data_type: :int8, + dictionary: dictionary, + ordered: true, + }, + values) + assert_equal(values, target.values) + end +end + +class ValuesArrayDenseUnionArrayTest < Test::Unit::TestCase + include ValuesDenseUnionArrayTests + + def build(type, values) + build_array(type, values) + end +end + +class ValuesChunkedArrayDenseUnionArrayTest < Test::Unit::TestCase + include ValuesDenseUnionArrayTests + + def build(type, values) + Arrow::ChunkedArray.new([build_array(type, values)]) + end +end diff --git a/src/arrow/ruby/red-arrow/test/values/test-list-array.rb b/src/arrow/ruby/red-arrow/test/values/test-list-array.rb new file mode 100644 index 000000000..d2905b36b --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/values/test-list-array.rb @@ -0,0 +1,532 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module ValuesListArrayTests + def build_data_type(type) + field_description = { + name: :element, + } + if type.is_a?(Hash) + field_description = field_description.merge(type) + else + field_description[:type] = type + end + Arrow::ListDataType.new(field: field_description) + end + + def build_array(type, values) + Arrow::ListArray.new(build_data_type(type), values) + end + + def test_null + values = [ + [nil, nil, nil], + nil, + ] + target = build(:null, values) + assert_equal(values, target.values) + end + + def test_boolean + values = [ + [true, nil, false], + nil, + ] + target = build(:boolean, values) + assert_equal(values, target.values) + end + + def test_int8 + values = [ + [-(2 ** 7), nil, (2 ** 7) - 1], + nil, + ] + target = build(:int8, values) + assert_equal(values, target.values) + end + + def test_uint8 + values = [ + [0, nil, (2 ** 8) - 1], + nil, + ] + target = build(:uint8, values) + assert_equal(values, target.values) + end + + def test_int16 + values = [ + [-(2 ** 15), nil, (2 ** 15) - 1], + nil, + ] + target = build(:int16, values) + assert_equal(values, target.values) + end + + def test_uint16 + values = [ + [0, nil, (2 ** 16) - 1], + nil, + ] + target = build(:uint16, values) + assert_equal(values, target.values) + end + + def test_int32 + values = [ + [-(2 ** 31), nil, (2 ** 31) - 1], + nil, + ] + target = build(:int32, values) + assert_equal(values, target.values) + end + + def test_uint32 + values = [ + [0, nil, (2 ** 32) - 1], + nil, + ] + target = build(:uint32, values) + assert_equal(values, target.values) + end + + def test_int64 + values = [ + [-(2 ** 63), nil, (2 ** 63) - 1], + nil, + ] + target = build(:int64, values) + assert_equal(values, target.values) + end + + def test_uint64 + values = [ + [0, nil, (2 ** 64) - 1], + nil, + ] + target = build(:uint64, values) + assert_equal(values, target.values) + end + + def test_float + values = [ + [-1.0, nil, 1.0], + nil, + ] + target = build(:float, values) + assert_equal(values, target.values) + end + + def test_double + values = [ + [-1.0, nil, 1.0], + nil, + ] + target = build(:double, values) + assert_equal(values, target.values) + end + + def test_binary + values = [ + ["\x00".b, nil, "\xff".b], + nil, + ] + target = build(:binary, values) + assert_equal(values, target.values) + end + + def test_string + values = [ + [ + "Ruby", + nil, + "\u3042", # U+3042 HIRAGANA LETTER A + ], + nil, + ] + target = build(:string, values) + assert_equal(values, target.values) + end + + def test_date32 + values = [ + [ + Date.new(1960, 1, 1), + nil, + Date.new(2017, 8, 23), + ], + nil, + ] + target = build(:date32, values) + assert_equal(values, target.values) + end + + def test_date64 + values = [ + [ + DateTime.new(1960, 1, 1, 2, 9, 30), + nil, + DateTime.new(2017, 8, 23, 14, 57, 2), + ], + nil, + ] + target = build(:date64, values) + assert_equal(values, target.values) + end + + def test_timestamp_second + values = [ + [ + Time.parse("1960-01-01T02:09:30Z"), + nil, + Time.parse("2017-08-23T14:57:02Z"), + ], + nil, + ] + target = build({ + type: :timestamp, + unit: :second, + }, + values) + assert_equal(values, target.values) + end + + def test_timestamp_milli + values = [ + [ + Time.parse("1960-01-01T02:09:30.123Z"), + nil, + Time.parse("2017-08-23T14:57:02.987Z"), + ], + nil, + ] + target = build({ + type: :timestamp, + unit: :milli, + }, + values) + assert_equal(values, target.values) + end + + def test_timestamp_micro + values = [ + [ + Time.parse("1960-01-01T02:09:30.123456Z"), + nil, + Time.parse("2017-08-23T14:57:02.987654Z"), + ], + nil, + ] + target = build({ + type: :timestamp, + unit: :micro, + }, + values) + assert_equal(values, target.values) + end + + def test_timestamp_nano + values = [ + [ + Time.parse("1960-01-01T02:09:30.123456789Z"), + nil, + Time.parse("2017-08-23T14:57:02.987654321Z"), + ], + nil, + ] + target = build({ + type: :timestamp, + unit: :nano, + }, + values) + assert_equal(values, target.values) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + values = [ + [ + # 00:10:00 + Arrow::Time.new(unit, 60 * 10), + nil, + # 02:00:09 + Arrow::Time.new(unit, 60 * 60 * 2 + 9), + ], + nil, + ] + target = build({ + type: :time32, + unit: :second, + }, + values) + assert_equal(values, target.values) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + values = [ + [ + # 00:10:00.123 + Arrow::Time.new(unit, (60 * 10) * 1000 + 123), + nil, + # 02:00:09.987 + Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987), + ], + nil, + ] + target = build({ + type: :time32, + unit: :milli, + }, + values) + assert_equal(values, target.values) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + values = [ + [ + # 00:10:00.123456 + Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456), + nil, + # 02:00:09.987654 + Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654), + ], + nil, + ] + target = build({ + type: :time64, + unit: :micro, + }, + values) + assert_equal(values, target.values) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + values = [ + [ + # 00:10:00.123456789 + Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789), + nil, + # 02:00:09.987654321 + Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321), + ], + nil, + ] + target = build({ + type: :time64, + unit: :nano, + }, + values) + assert_equal(values, target.values) + end + + def test_decimal128 + values = [ + [ + BigDecimal("92.92"), + nil, + BigDecimal("29.29"), + ], + nil, + ] + target = build({ + type: :decimal128, + precision: 8, + scale: 2, + }, + values) + assert_equal(values, target.values) + end + + def test_decimal256 + values = [ + [ + BigDecimal("92.92"), + nil, + BigDecimal("29.29"), + ], + nil, + ] + target = build({ + type: :decimal256, + precision: 38, + scale: 2, + }, + values) + assert_equal(values, target.values) + end + + def test_list + values = [ + [ + [ + true, + nil, + ], + nil, + [ + nil, + false, + ], + ], + nil, + ] + target = build({ + type: :list, + field: { + name: :sub_element, + type: :boolean, + }, + }, + values) + assert_equal(values, target.values) + end + + def test_struct + values = [ + [ + {"field" => true}, + nil, + {"field" => nil}, + ], + nil, + ] + target = build({ + type: :struct, + fields: [ + { + name: :field, + type: :boolean, + }, + ], + }, + values) + assert_equal(values, target.values) + end + + def test_map + values = [ + [ + {"key1" => true, "key2" => nil}, + nil, + ], + nil, + ] + target = build({ + type: :map, + key: :string, + item: :boolean, + }, + values) + assert_equal(values, target.values) + end + + def test_sparse + omit("Need to add support for SparseUnionArrayBuilder") + values = [ + [ + {"field1" => true}, + nil, + {"field2" => nil}, + ], + nil, + ] + target = build({ + type: :sparse_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + values) + assert_equal(values, target.values) + end + + def test_dense + omit("Need to add support for DenseUnionArrayBuilder") + values = [ + [ + {"field1" => true}, + nil, + {"field2" => nil}, + ], + nil, + ] + target = build({ + type: :dense_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + values) + assert_equal(values, target.values) + end + + def test_dictionary + omit("Need to add support for DictionaryArrayBuilder") + values = [ + [ + "Ruby", + nil, + "GLib", + ], + nil, + ] + dictionary = Arrow::StringArray.new(["GLib", "Ruby"]) + target = build({ + type: :dictionary, + index_data_type: :int8, + dictionary: dictionary, + ordered: true, + }, + values) + assert_equal(values, target.values) + end +end + +class ValuesArrayListArrayTest < Test::Unit::TestCase + include ValuesListArrayTests + + def build(type, values) + build_array(type, values) + end +end + +class ValuesChunkedArrayListArrayTest < Test::Unit::TestCase + include ValuesListArrayTests + + def build(type, values) + Arrow::ChunkedArray.new([build_array(type, values)]) + end +end diff --git a/src/arrow/ruby/red-arrow/test/values/test-map-array.rb b/src/arrow/ruby/red-arrow/test/values/test-map-array.rb new file mode 100644 index 000000000..14b5bf6c3 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/values/test-map-array.rb @@ -0,0 +1,433 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module ValuesMapArrayTests + def build_data_type(item_type) + Arrow::MapDataType.new( + key: :string, + item: item_type + ) + end + + def build_array(item_type, values) + Arrow::MapArray.new(build_data_type(item_type), values) + end + + def test_null + values = [ + {"key1" => nil}, + nil, + ] + target = build(:null, values) + assert_equal(values, target.values) + end + + def test_boolean + values = [ + {"key1" => false, "key2" => nil}, + nil, + ] + target = build(:boolean, values) + assert_equal(values, target.values) + end + + def test_int8 + values = [ + {"key1" => (2 ** 7) - 1, "key2" => nil}, + nil, + ] + target = build(:int8, values) + assert_equal(values, target.values) + end + + def test_uint8 + values = [ + {"key1" => (2 ** 8) - 1, "key2" => nil}, + nil, + ] + target = build(:uint8, values) + assert_equal(values, target.values) + end + + def test_uint16 + values = [ + {"key1" => (2 ** 16) - 1, "key2" => nil}, + nil, + ] + target = build(:uint16, values) + assert_equal(values, target.values) + end + + def test_int32 + values = [ + {"key1" => -(2 ** 31), "key2" => nil}, + nil, + ] + target = build(:int32, values) + assert_equal(values, target.values) + end + + def test_uint32 + values = [ + {"key1" => (2 ** 32) - 1, "key2" => nil}, + nil, + ] + target = build(:uint32, values) + assert_equal(values, target.values) + end + + def test_int64 + values = [ + {"key1" => -(2 ** 63), "key2" => nil}, + nil, + ] + target = build(:int64, values) + assert_equal(values, target.values) + end + + def test_uint64 + values = [ + {"key1" => (2 ** 64) - 1, "key2" => nil}, + nil, + ] + target = build(:uint64, values) + assert_equal(values, target.values) + end + + def test_float + values = [ + {"key1" => -1.0, "key2" => nil}, + nil, + ] + target = build(:float, values) + assert_equal(values, target.values) + end + + def test_double + values = [ + {"key1" => -1.0, "key2" => nil}, + nil, + ] + target = build(:double, values) + assert_equal(values, target.values) + end + + def test_binary + values = [ + {"key1" => "\xff".b, "key2" => nil}, + nil, + ] + target = build(:binary, values) + assert_equal(values, target.values) + end + + def test_string + values = [ + {"key1" => "Ruby", "key2" => nil}, + nil, + ] + target = build(:string, values) + assert_equal(values, target.values) + end + + def test_date32 + values = [ + {"key1" => Date.new(1960, 1, 1), "key2" => nil}, + nil, + ] + target = build(:date32, values) + assert_equal(values, target.values) + end + + def test_date64 + values = [ + {"key1" => DateTime.new(1960, 1, 1, 2, 9, 30), "key2" => nil}, + nil, + ] + target = build(:date64, values) + assert_equal(values, target.values) + end + + def test_timestamp_second + values = [ + {"key1" => Time.parse("1960-01-01T02:09:30Z"), "key2" => nil}, + nil, + ] + target = build({ + type: :timestamp, + unit: :second, + }, + values) + assert_equal(values, target.values) + end + + def test_timestamp_milli + values = [ + {"key1" => Time.parse("1960-01-01T02:09:30.123Z"), "key2" => nil}, + nil, + ] + target = build({ + type: :timestamp, + unit: :milli, + }, + values) + assert_equal(values, target.values) + end + + def test_timestamp_micro + values = [ + {"key1" => Time.parse("1960-01-01T02:09:30.123456Z"), "key2" => nil}, + nil, + ] + target = build({ + type: :timestamp, + unit: :micro, + }, + values) + assert_equal(values, target.values) + end + + def test_timestamp_nano + values = [ + {"key1" => Time.parse("1960-01-01T02:09:30.123456789Z"), "key2" => nil}, + nil, + ] + target = build({ + type: :timestamp, + unit: :nano, + }, + values) + assert_equal(values, target.values) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + values = [ + # 00:10:00 + {"key1" => Arrow::Time.new(unit, 60 * 10), "key2" => nil}, + nil, + ] + target = build({ + type: :time32, + unit: :second, + }, + values) + assert_equal(values, target.values) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + values = [ + # 00:10:00.123 + {"key1" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123), "key2" => nil}, + nil, + ] + target = build({ + type: :time32, + unit: :milli, + }, + values) + assert_equal(values, target.values) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + values = [ + # 00:10:00.123456 + {"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456), "key2" => nil}, + nil, + ] + target = build({ + type: :time64, + unit: :micro, + }, + values) + assert_equal(values, target.values) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + values = [ + # 00:10:00.123456789 + {"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789), "key2" => nil}, + nil, + ] + target = build({ + type: :time64, + unit: :nano, + }, + values) + assert_equal(values, target.values) + end + + def test_decimal128 + values = [ + {"key1" => BigDecimal("92.92"), "key2" => nil}, + nil, + ] + target = build({ + type: :decimal128, + precision: 8, + scale: 2, + }, + values) + assert_equal(values, target.values) + end + + def test_decimal256 + values = [ + {"key1" => BigDecimal("92.92"), "key2" => nil}, + nil, + ] + target = build({ + type: :decimal256, + precision: 38, + scale: 2, + }, + values) + assert_equal(values, target.values) + end + + def test_list + values = [ + {"key1" => [true, nil, false], "key2" => nil}, + nil, + ] + target = build({ + type: :list, + field: { + name: :sub_element, + type: :boolean, + }, + }, + values) + assert_equal(values, target.values) + end + + def test_struct + values = [ + {"key1" => {"field" => true}, "key2" => nil, "key3" => {"field" => nil}}, + nil, + ] + target = build({ + type: :struct, + fields: [ + { + name: :field, + type: :boolean, + }, + ], + }, + values) + assert_equal(values, target.values) + end + + def test_map + values = [ + {"key1" => {"sub_key1" => true, "sub_key2" => nil}, "key2" => nil}, + nil, + ] + target = build({ + type: :map, + key: :string, + item: :boolean, + }, + values) + assert_equal(values, target.values) + end + + def test_sparse_union + omit("Need to add support for SparseUnionArrayBuilder") + values = [ + {"key1" => {"field1" => true}, "key2" => nil, "key3" => {"field2" => nil}}, + nil, + ] + target = build({ + type: :sparse_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + values) + assert_equal(values, target.values) + end + + def test_dense_union + omit("Need to add support for DenseUnionArrayBuilder") + values = [ + {"key1" => {"field1" => true}, "key2" => nil, "key3" => {"field2" => nil}}, + nil, + ] + target = build({ + type: :dense_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + values) + assert_equal(values, target.values) + end + + def test_dictionary + omit("Need to add support for DictionaryArrayBuilder") + values = [ + {"key1" => "Ruby", "key2" => nil, "key3" => "GLib"}, + nil, + ] + dictionary = Arrow::StringArray.new(["GLib", "Ruby"]) + target = build({ + type: :dictionary, + index_data_type: :int8, + dictionary: dictionary, + ordered: true, + }, + values) + assert_equal(values, target.values) + end +end + +class ValuesArrayMapArrayTest < Test::Unit::TestCase + include ValuesMapArrayTests + + def build(item_type, values) + build_array(item_type, values) + end +end + +class ValuesChunkedArrayMapArrayTest < Test::Unit::TestCase + include ValuesMapArrayTests + + def build(item_type, values) + Arrow::ChunkedArray.new([build_array(item_type, values)]) + end +end diff --git a/src/arrow/ruby/red-arrow/test/values/test-sparse-union-array.rb b/src/arrow/ruby/red-arrow/test/values/test-sparse-union-array.rb new file mode 100644 index 000000000..909d67e61 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/values/test-sparse-union-array.rb @@ -0,0 +1,473 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module ValuesSparseUnionArrayTests + def build_data_type(type, type_codes) + field_description = {} + if type.is_a?(Hash) + field_description = field_description.merge(type) + else + field_description[:type] = type + end + Arrow::SparseUnionDataType.new(fields: [ + field_description.merge(name: "0"), + field_description.merge(name: "1"), + ], + type_codes: type_codes) + end + + def build_array(type, values) + type_codes = [0, 1] + data_type = build_data_type(type, type_codes) + type_ids = [] + arrays = data_type.fields.collect do |field| + sub_schema = Arrow::Schema.new([field]) + sub_records = values.collect do |value| + [value.nil? ? nil : value[field.name]] + end + sub_record_batch = Arrow::RecordBatch.new(sub_schema, + sub_records) + sub_record_batch.columns[0].data + end + values.each do |value| + if value.key?("0") + type_ids << type_codes[0] + elsif value.key?("1") + type_ids << type_codes[1] + end + end + Arrow::SparseUnionArray.new(data_type, + Arrow::Int8Array.new(type_ids), + arrays) + end + + def test_null + values = [ + {"0" => nil}, + ] + target = build(:null, values) + assert_equal(values, target.values) + end + + def test_boolean + values = [ + {"0" => true}, + {"1" => nil}, + ] + target = build(:boolean, values) + assert_equal(values, target.values) + end + + def test_int8 + values = [ + {"0" => -(2 ** 7)}, + {"1" => nil}, + ] + target = build(:int8, values) + assert_equal(values, target.values) + end + + def test_uint8 + values = [ + {"0" => (2 ** 8) - 1}, + {"1" => nil}, + ] + target = build(:uint8, values) + assert_equal(values, target.values) + end + + def test_int16 + values = [ + {"0" => -(2 ** 15)}, + {"1" => nil}, + ] + target = build(:int16, values) + assert_equal(values, target.values) + end + + def test_uint16 + values = [ + {"0" => (2 ** 16) - 1}, + {"1" => nil}, + ] + target = build(:uint16, values) + assert_equal(values, target.values) + end + + def test_int32 + values = [ + {"0" => -(2 ** 31)}, + {"1" => nil}, + ] + target = build(:int32, values) + assert_equal(values, target.values) + end + + def test_uint32 + values = [ + {"0" => (2 ** 32) - 1}, + {"1" => nil}, + ] + target = build(:uint32, values) + assert_equal(values, target.values) + end + + def test_int64 + values = [ + {"0" => -(2 ** 63)}, + {"1" => nil}, + ] + target = build(:int64, values) + assert_equal(values, target.values) + end + + def test_uint64 + values = [ + {"0" => (2 ** 64) - 1}, + {"1" => nil}, + ] + target = build(:uint64, values) + assert_equal(values, target.values) + end + + def test_float + values = [ + {"0" => -1.0}, + {"1" => nil}, + ] + target = build(:float, values) + assert_equal(values, target.values) + end + + def test_double + values = [ + {"0" => -1.0}, + {"1" => nil}, + ] + target = build(:double, values) + assert_equal(values, target.values) + end + + def test_binary + values = [ + {"0" => "\xff".b}, + {"1" => nil}, + ] + target = build(:binary, values) + assert_equal(values, target.values) + end + + def test_string + values = [ + {"0" => "Ruby"}, + {"1" => nil}, + ] + target = build(:string, values) + assert_equal(values, target.values) + end + + def test_date32 + values = [ + {"0" => Date.new(1960, 1, 1)}, + {"1" => nil}, + ] + target = build(:date32, values) + assert_equal(values, target.values) + end + + def test_date64 + values = [ + {"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}, + {"1" => nil}, + ] + target = build(:date64, values) + assert_equal(values, target.values) + end + + def test_timestamp_second + values = [ + {"0" => Time.parse("1960-01-01T02:09:30Z")}, + {"1" => nil}, + ] + target = build({ + type: :timestamp, + unit: :second, + }, + values) + assert_equal(values, target.values) + end + + def test_timestamp_milli + values = [ + {"0" => Time.parse("1960-01-01T02:09:30.123Z")}, + {"1" => nil}, + ] + target = build({ + type: :timestamp, + unit: :milli, + }, + values) + assert_equal(values, target.values) + end + + def test_timestamp_micro + values = [ + {"0" => Time.parse("1960-01-01T02:09:30.123456Z")}, + {"1" => nil}, + ] + target = build({ + type: :timestamp, + unit: :micro, + }, + values) + assert_equal(values, target.values) + end + + def test_timestamp_nano + values = [ + {"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}, + {"1" => nil}, + ] + target = build({ + type: :timestamp, + unit: :nano, + }, + values) + assert_equal(values, target.values) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + values = [ + # 00:10:00 + {"0" => Arrow::Time.new(unit, 60 * 10)}, + {"1" => nil}, + ] + target = build({ + type: :time32, + unit: :second, + }, + values) + assert_equal(values, target.values) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + values = [ + # 00:10:00.123 + {"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}, + {"1" => nil}, + ] + target = build({ + type: :time32, + unit: :milli, + }, + values) + assert_equal(values, target.values) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + values = [ + # 00:10:00.123456 + {"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}, + {"1" => nil}, + ] + target = build({ + type: :time64, + unit: :micro, + }, + values) + assert_equal(values, target.values) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + values = [ + # 00:10:00.123456789 + {"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}, + {"1" => nil}, + ] + target = build({ + type: :time64, + unit: :nano, + }, + values) + assert_equal(values, target.values) + end + + def test_decimal128 + values = [ + {"0" => BigDecimal("92.92")}, + {"1" => nil}, + ] + target = build({ + type: :decimal128, + precision: 8, + scale: 2, + }, + values) + assert_equal(values, target.values) + end + + def test_decimal256 + values = [ + {"0" => BigDecimal("92.92")}, + {"1" => nil}, + ] + target = build({ + type: :decimal256, + precision: 38, + scale: 2, + }, + values) + assert_equal(values, target.values) + end + + def test_list + values = [ + {"0" => [true, nil, false]}, + {"1" => nil}, + ] + target = build({ + type: :list, + field: { + name: :sub_element, + type: :boolean, + }, + }, + values) + assert_equal(values, target.values) + end + + def test_struct + values = [ + {"0" => {"sub_field" => true}}, + {"1" => nil}, + {"0" => {"sub_field" => nil}}, + ] + target = build({ + type: :struct, + fields: [ + { + name: :sub_field, + type: :boolean, + }, + ], + }, + values) + assert_equal(values, target.values) + end + + def test_map + values = [ + {"0" => {"key1" => true, "key2" => nil}}, + {"1" => nil}, + ] + target = build({ + type: :map, + key: :string, + item: :boolean, + }, + values) + assert_equal(values, target.values) + end + + def test_sparse_union + omit("Need to add support for SparseUnionArrayBuilder") + values = [ + {"0" => {"field1" => true}}, + {"1" => nil}, + {"0" => {"field2" => nil}}, + ] + target = build({ + type: :sparse_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + values) + assert_equal(values, target.values) + end + + def test_dense_union + omit("Need to add support for DenseUnionArrayBuilder") + values = [ + {"0" => {"field1" => true}}, + {"1" => nil}, + {"0" => {"field2" => nil}}, + ] + target = build({ + type: :dense_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + values) + assert_equal(values, target.values) + end + + def test_dictionary + omit("Need to add support for DictionaryArrayBuilder") + values = [ + {"0" => "Ruby"}, + {"1" => nil}, + {"0" => "GLib"}, + ] + dictionary = Arrow::StringArray.new(["GLib", "Ruby"]) + target = build({ + type: :dictionary, + index_data_type: :int8, + dictionary: dictionary, + ordered: true, + }, + values) + assert_equal(values, target.values) + end +end + +class ValuesArraySparseUnionArrayTest < Test::Unit::TestCase + include ValuesSparseUnionArrayTests + + def build(type, values) + build_array(type, values) + end +end + +class ValuesChunkedArraySparseUnionArrayTest < Test::Unit::TestCase + include ValuesSparseUnionArrayTests + + def build(type, values) + Arrow::ChunkedArray.new([build_array(type, values)]) + end +end diff --git a/src/arrow/ruby/red-arrow/test/values/test-struct-array.rb b/src/arrow/ruby/red-arrow/test/values/test-struct-array.rb new file mode 100644 index 000000000..4e3396796 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/values/test-struct-array.rb @@ -0,0 +1,482 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module ValuesStructArrayTests + def build_data_type(type) + field_description = { + name: :field, + } + if type.is_a?(Hash) + field_description = field_description.merge(type) + else + field_description[:type] = type + end + Arrow::StructDataType.new([field_description]) + end + + def build_array(type, values) + Arrow::StructArray.new(build_data_type(type), values) + end + + def test_null + values = [ + {"field" => nil}, + nil, + ] + target = build(:null, values) + assert_equal(values, target.values) + end + + def test_boolean + values = [ + {"field" => true}, + nil, + {"field" => nil}, + ] + target = build(:boolean, values) + assert_equal(values, target.values) + end + + def test_int8 + values = [ + {"field" => -(2 ** 7)}, + nil, + {"field" => nil}, + ] + target = build(:int8, values) + assert_equal(values, target.values) + end + + def test_uint8 + values = [ + {"field" => (2 ** 8) - 1}, + nil, + {"field" => nil}, + ] + target = build(:uint8, values) + assert_equal(values, target.values) + end + + def test_int16 + values = [ + {"field" => -(2 ** 15)}, + nil, + {"field" => nil}, + ] + target = build(:int16, values) + assert_equal(values, target.values) + end + + def test_uint16 + values = [ + {"field" => (2 ** 16) - 1}, + nil, + {"field" => nil}, + ] + target = build(:uint16, values) + assert_equal(values, target.values) + end + + def test_int32 + values = [ + {"field" => -(2 ** 31)}, + nil, + {"field" => nil}, + ] + target = build(:int32, values) + assert_equal(values, target.values) + end + + def test_uint32 + values = [ + {"field" => (2 ** 32) - 1}, + nil, + {"field" => nil}, + ] + target = build(:uint32, values) + assert_equal(values, target.values) + end + + def test_int64 + values = [ + {"field" => -(2 ** 63)}, + nil, + {"field" => nil}, + ] + target = build(:int64, values) + assert_equal(values, target.values) + end + + def test_uint64 + values = [ + {"field" => (2 ** 64) - 1}, + nil, + {"field" => nil}, + ] + target = build(:uint64, values) + assert_equal(values, target.values) + end + + def test_float + values = [ + {"field" => -1.0}, + nil, + {"field" => nil}, + ] + target = build(:float, values) + assert_equal(values, target.values) + end + + def test_double + values = [ + {"field" => -1.0}, + nil, + {"field" => nil}, + ] + target = build(:double, values) + assert_equal(values, target.values) + end + + def test_binary + values = [ + {"field" => "\xff".b}, + nil, + {"field" => nil}, + ] + target = build(:binary, values) + assert_equal(values, target.values) + end + + def test_string + values = [ + {"field" => "Ruby"}, + nil, + {"field" => nil}, + ] + target = build(:string, values) + assert_equal(values, target.values) + end + + def test_date32 + values = [ + {"field" => Date.new(1960, 1, 1)}, + nil, + {"field" => nil}, + ] + target = build(:date32, values) + assert_equal(values, target.values) + end + + def test_date64 + values = [ + {"field" => DateTime.new(1960, 1, 1, 2, 9, 30)}, + nil, + {"field" => nil}, + ] + target = build(:date64, values) + assert_equal(values, target.values) + end + + def test_timestamp_second + values = [ + {"field" => Time.parse("1960-01-01T02:09:30Z")}, + nil, + {"field" => nil}, + ] + target = build({ + type: :timestamp, + unit: :second, + }, + values) + assert_equal(values, target.values) + end + + def test_timestamp_milli + values = [ + {"field" => Time.parse("1960-01-01T02:09:30.123Z")}, + nil, + {"field" => nil}, + ] + target = build({ + type: :timestamp, + unit: :milli, + }, + values) + assert_equal(values, target.values) + end + + def test_timestamp_micro + values = [ + {"field" => Time.parse("1960-01-01T02:09:30.123456Z")}, + nil, + {"field" => nil}, + ] + target = build({ + type: :timestamp, + unit: :micro, + }, + values) + assert_equal(values, target.values) + end + + def test_timestamp_nano + values = [ + {"field" => Time.parse("1960-01-01T02:09:30.123456789Z")}, + nil, + {"field" => nil}, + ] + target = build({ + type: :timestamp, + unit: :nano, + }, + values) + assert_equal(values, target.values) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + values = [ + # 00:10:00 + {"field" => Arrow::Time.new(unit, 60 * 10)}, + nil, + {"field" => nil}, + ] + target = build({ + type: :time32, + unit: :second, + }, + values) + assert_equal(values, target.values) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + values = [ + # 00:10:00.123 + {"field" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}, + nil, + {"field" => nil}, + ] + target = build({ + type: :time32, + unit: :milli, + }, + values) + assert_equal(values, target.values) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + values = [ + # 00:10:00.123456 + {"field" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}, + nil, + {"field" => nil}, + ] + target = build({ + type: :time64, + unit: :micro, + }, + values) + assert_equal(values, target.values) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + values = [ + # 00:10:00.123456789 + {"field" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}, + nil, + {"field" => nil}, + ] + target = build({ + type: :time64, + unit: :nano, + }, + values) + assert_equal(values, target.values) + end + + def test_decimal128 + values = [ + {"field" => BigDecimal("92.92")}, + nil, + {"field" => nil}, + ] + target = build({ + type: :decimal128, + precision: 8, + scale: 2, + }, + values) + assert_equal(values, target.values) + end + + def test_decimal256 + values = [ + {"field" => BigDecimal("92.92")}, + nil, + {"field" => nil}, + ] + target = build({ + type: :decimal256, + precision: 38, + scale: 2, + }, + values) + assert_equal(values, target.values) + end + + def test_list + values = [ + {"field" => [true, nil, false]}, + nil, + {"field" => nil}, + ] + target = build({ + type: :list, + field: { + name: :sub_element, + type: :boolean, + }, + }, + values) + assert_equal(values, target.values) + end + + def test_struct + values = [ + {"field" => {"sub_field" => true}}, + nil, + {"field" => nil}, + {"field" => {"sub_field" => nil}}, + ] + target = build({ + type: :struct, + fields: [ + { + name: :sub_field, + type: :boolean, + }, + ], + }, + values) + assert_equal(values, target.values) + end + + def test_map + values = [ + {"field" => {"key1" => true, "key2" => nil}}, + nil, + {"field" => nil}, + ] + target = build({ + type: :map, + key: :string, + item: :boolean, + }, + values) + assert_equal(values, target.values) + end + + def test_sparse_union + omit("Need to add support for SparseUnionArrayBuilder") + values = [ + {"field" => {"field1" => true}}, + nil, + {"field" => nil}, + {"field" => {"field2" => nil}}, + ] + target = build({ + type: :sparse_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + values) + assert_equal(values, target.values) + end + + def test_dense_union + omit("Need to add support for DenseUnionArrayBuilder") + values = [ + {"field" => {"field1" => true}}, + nil, + {"field" => nil}, + {"field" => {"field2" => nil}}, + ] + target = build({ + type: :dense_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + values) + assert_equal(values, target.values) + end + + def test_dictionary + omit("Need to add support for DictionaryArrayBuilder") + values = [ + {"field" => "Ruby"}, + nil, + {"field" => nil}, + {"field" => "GLib"}, + ] + dictionary = Arrow::StringArray.new(["GLib", "Ruby"]) + target = build({ + type: :dictionary, + index_data_type: :int8, + dictionary: dictionary, + ordered: true, + }, + values) + assert_equal(values, target.values) + end +end + +class ValuesArrayStructArrayTest < Test::Unit::TestCase + include ValuesStructArrayTests + + def build(type, values) + build_array(type, values) + end +end + +class ValuesChunkedArrayStructArrayTest < Test::Unit::TestCase + include ValuesStructArrayTests + + def build(type, values) + Arrow::ChunkedArray.new([build_array(type, values)]) + end +end |