diff options
Diffstat (limited to '')
-rw-r--r-- | src/arrow/ruby/red-parquet/.gitignore | 19 | ||||
-rw-r--r-- | src/arrow/ruby/red-parquet/Gemfile | 24 | ||||
-rw-r--r-- | src/arrow/ruby/red-parquet/LICENSE.txt | 202 | ||||
-rw-r--r-- | src/arrow/ruby/red-parquet/NOTICE.txt | 2 | ||||
-rw-r--r-- | src/arrow/ruby/red-parquet/README.md | 52 | ||||
-rw-r--r-- | src/arrow/ruby/red-parquet/Rakefile | 41 | ||||
-rw-r--r-- | src/arrow/ruby/red-parquet/dependency-check/Rakefile | 47 | ||||
-rw-r--r-- | src/arrow/ruby/red-parquet/lib/parquet.rb | 29 | ||||
-rw-r--r-- | src/arrow/ruby/red-parquet/lib/parquet/arrow-table-loadable.rb | 36 | ||||
-rw-r--r-- | src/arrow/ruby/red-parquet/lib/parquet/arrow-table-savable.rb | 52 | ||||
-rw-r--r-- | src/arrow/ruby/red-parquet/lib/parquet/loader.rb | 46 | ||||
-rw-r--r-- | src/arrow/ruby/red-parquet/lib/parquet/version.rb | 26 | ||||
-rw-r--r-- | src/arrow/ruby/red-parquet/lib/parquet/writer-properties.rb | 28 | ||||
-rw-r--r-- | src/arrow/ruby/red-parquet/red-parquet.gemspec | 49 | ||||
-rw-r--r-- | src/arrow/ruby/red-parquet/test/helper.rb | 22 | ||||
-rwxr-xr-x | src/arrow/ruby/red-parquet/test/run-test.rb | 50 | ||||
-rw-r--r-- | src/arrow/ruby/red-parquet/test/test-arrow-table.rb | 99 |
17 files changed, 824 insertions, 0 deletions
diff --git a/src/arrow/ruby/red-parquet/.gitignore b/src/arrow/ruby/red-parquet/.gitignore new file mode 100644 index 000000000..afd93a168 --- /dev/null +++ b/src/arrow/ruby/red-parquet/.gitignore @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +/Gemfile.lock +/pkg/ diff --git a/src/arrow/ruby/red-parquet/Gemfile b/src/arrow/ruby/red-parquet/Gemfile new file mode 100644 index 000000000..7c4cefcf3 --- /dev/null +++ b/src/arrow/ruby/red-parquet/Gemfile @@ -0,0 +1,24 @@ +# -*- ruby -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +source "https://rubygems.org/" + +gemspec + +gem "red-arrow", path: "../red-arrow" diff --git a/src/arrow/ruby/red-parquet/LICENSE.txt b/src/arrow/ruby/red-parquet/LICENSE.txt new file mode 100644 index 000000000..d64569567 --- /dev/null +++ b/src/arrow/ruby/red-parquet/LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/src/arrow/ruby/red-parquet/NOTICE.txt b/src/arrow/ruby/red-parquet/NOTICE.txt new file mode 100644 index 000000000..e08aeda8a --- /dev/null +++ b/src/arrow/ruby/red-parquet/NOTICE.txt @@ -0,0 +1,2 @@ +Apache Arrow +Copyright 2016 The Apache Software Foundation diff --git a/src/arrow/ruby/red-parquet/README.md b/src/arrow/ruby/red-parquet/README.md new file mode 100644 index 000000000..ff919c537 --- /dev/null +++ b/src/arrow/ruby/red-parquet/README.md @@ -0,0 +1,52 @@ +<!--- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +# Red Parquet - Apache Parquet Ruby + +Red Parquet is the Ruby bindings of Apache Parquet. Red Parquet is based on GObject Introspection. + +[Apache Parquet](https://parquet.apache.org/) is a columnar storage format. + +[GObject Introspection](https://wiki.gnome.org/action/show/Projects/GObjectIntrospection) is a middleware for language bindings of C library. GObject Introspection can generate language bindings automatically at runtime. + +Red Parquet uses [Apache Parquet GLib](https://github.com/apache/arrow/tree/master/c_glib/parquet-glib) and [gobject-introspection gem](https://rubygems.org/gems/gobject-introspection) to generate Ruby bindings of Apache Parquet. + +Apache Parquet GLib is a C wrapper for [Apache Parquet C++](https://github.com/apache/arrow/tree/master/cpp/src/parquet). GObject Introspection can't use Apache Parquet C++ directly. Apache Parquet GLib is a bridge between Apache Parquet C++ and GObject Introspection. + +gobject-introspection gem is a Ruby bindings of GObject Introspection. Red Parquet uses GObject Introspection via gobject-introspection gem. + +## Install + +Install Apache Parquet GLib before install Red Parquet. See [Apache Arrow install document](https://arrow.apache.org/install/) for details. + +Install Red Parquet after you install Apache Parquet GLib: + +```text +% gem install red-parquet +``` + +## Usage + +```ruby +require "parquet" + +table = Arrow::Table.load("/dev/shm/data.parquet") +# Process data in table +table.save("/dev/shm/data-processed.parquet") +``` diff --git a/src/arrow/ruby/red-parquet/Rakefile b/src/arrow/ruby/red-parquet/Rakefile new file mode 100644 index 000000000..579b946d4 --- /dev/null +++ b/src/arrow/ruby/red-parquet/Rakefile @@ -0,0 +1,41 @@ +# -*- ruby -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "rubygems" +require "bundler/gem_helper" + +base_dir = File.join(__dir__) + +helper = Bundler::GemHelper.new(base_dir) +helper.install + +release_task = Rake::Task["release"] +release_task.prerequisites.replace(["build", "release:rubygem_push"]) + +desc "Run tests" +task :test do + cd(base_dir) do + cd("dependency-check") do + ruby("-S", "rake") + end + ruby("test/run-test.rb") + end +end + +task default: :test diff --git a/src/arrow/ruby/red-parquet/dependency-check/Rakefile b/src/arrow/ruby/red-parquet/dependency-check/Rakefile new file mode 100644 index 000000000..58420eea4 --- /dev/null +++ b/src/arrow/ruby/red-parquet/dependency-check/Rakefile @@ -0,0 +1,47 @@ +# -*- ruby -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "pkg-config" +require "native-package-installer" +require_relative "../lib/parquet/version" + +case RUBY_PLATFORM +when /mingw|mswin/ + task :default => "nothing" +else + task :default => "dependency:check" +end + +task :nothing do +end + +namespace :dependency do + desc "Check dependency" + task :check do + unless PKGConfig.check_version?("parquet-glib", + Parquet::Version::MAJOR, + Parquet::Version::MINOR, + Parquet::Version::MICRO) + unless NativePackageInstaller.install(:debian => "libparquet-glib-dev", + :redhat => "parquet-glib-devel") + exit(false) + end + end + end +end diff --git a/src/arrow/ruby/red-parquet/lib/parquet.rb b/src/arrow/ruby/red-parquet/lib/parquet.rb new file mode 100644 index 000000000..81ae7d3ae --- /dev/null +++ b/src/arrow/ruby/red-parquet/lib/parquet.rb @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "arrow" + +require "parquet/version" + +require "parquet/loader" + +module Parquet + class Error < StandardError + end + + Loader.load +end diff --git a/src/arrow/ruby/red-parquet/lib/parquet/arrow-table-loadable.rb b/src/arrow/ruby/red-parquet/lib/parquet/arrow-table-loadable.rb new file mode 100644 index 000000000..e3aa1ce0a --- /dev/null +++ b/src/arrow/ruby/red-parquet/lib/parquet/arrow-table-loadable.rb @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Parquet + module ArrowTableLoadable + private + def load_as_parquet + input = open_input_stream + reader = Parquet::ArrowFileReader.new(input) + reader.use_threads = (@options[:use_threads] != false) + table = reader.read_table + table.instance_variable_set(:@input, input) + table + end + end +end + +module Arrow + class TableLoader + include Parquet::ArrowTableLoadable + end +end diff --git a/src/arrow/ruby/red-parquet/lib/parquet/arrow-table-savable.rb b/src/arrow/ruby/red-parquet/lib/parquet/arrow-table-savable.rb new file mode 100644 index 000000000..70c597527 --- /dev/null +++ b/src/arrow/ruby/red-parquet/lib/parquet/arrow-table-savable.rb @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Parquet + module ArrowTableSavable + private + def save_as_parquet + properties = WriterProperties.new + @options.each do |key, value| + next if value.nil? + set_method_name = "set_#{key}" + next unless properties.respond_to?(set_method_name) + case value + when ::Array, ::Hash + value.each do |path, v| + properties.__send__(set_method_name, v, path) + end + else + properties.__send__(set_method_name, value) + end + end + chunk_size = @options[:chunk_size] || @table.n_rows + open_raw_output_stream do |output| + ArrowFileWriter.open(@table.schema, + output, + properties) do |writer| + writer.write_table(@table, chunk_size) + end + end + end + end +end + +module Arrow + class TableSaver + include Parquet::ArrowTableSavable + end +end diff --git a/src/arrow/ruby/red-parquet/lib/parquet/loader.rb b/src/arrow/ruby/red-parquet/lib/parquet/loader.rb new file mode 100644 index 000000000..5e25872ff --- /dev/null +++ b/src/arrow/ruby/red-parquet/lib/parquet/loader.rb @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Parquet + class Loader < GObjectIntrospection::Loader + class << self + def load + super("Parquet", Parquet) + end + end + + private + def post_load(repository, namespace) + require_libraries + end + + def require_libraries + require "parquet/arrow-table-loadable" + require "parquet/arrow-table-savable" + require "parquet/writer-properties" + end + + def load_object_info(info) + super + + klass = @base_module.const_get(rubyish_class_name(info)) + if klass.method_defined?(:close) + klass.extend(Arrow::BlockClosable) + end + end + end +end diff --git a/src/arrow/ruby/red-parquet/lib/parquet/version.rb b/src/arrow/ruby/red-parquet/lib/parquet/version.rb new file mode 100644 index 000000000..8c9b41a36 --- /dev/null +++ b/src/arrow/ruby/red-parquet/lib/parquet/version.rb @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Parquet + VERSION = "6.0.1" + + module Version + numbers, TAG = VERSION.split("-") + MAJOR, MINOR, MICRO = numbers.split(".").collect(&:to_i) + STRING = VERSION + end +end diff --git a/src/arrow/ruby/red-parquet/lib/parquet/writer-properties.rb b/src/arrow/ruby/red-parquet/lib/parquet/writer-properties.rb new file mode 100644 index 000000000..5881471b4 --- /dev/null +++ b/src/arrow/ruby/red-parquet/lib/parquet/writer-properties.rb @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Parquet + class WriterProperties + def set_dictionary(enable, path=nil) + if enable + enable_dictionary(path) + else + disable_dictionary(path) + end + end + end +end diff --git a/src/arrow/ruby/red-parquet/red-parquet.gemspec b/src/arrow/ruby/red-parquet/red-parquet.gemspec new file mode 100644 index 000000000..dffafed19 --- /dev/null +++ b/src/arrow/ruby/red-parquet/red-parquet.gemspec @@ -0,0 +1,49 @@ +# -*- ruby -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require_relative "lib/parquet/version" + +Gem::Specification.new do |spec| + spec.name = "red-parquet" + version_components = [ + Parquet::Version::MAJOR.to_s, + Parquet::Version::MINOR.to_s, + Parquet::Version::MICRO.to_s, + Parquet::Version::TAG, + ] + spec.version = version_components.compact.join(".") + spec.homepage = "https://arrow.apache.org/" + spec.authors = ["Apache Arrow Developers"] + spec.email = ["dev@arrow.apache.org"] + + spec.summary = "Red Parquet is the Ruby bindings of Apache Parquet" + spec.description = "Apache Parquet is a columnar storage format." + spec.license = "Apache-2.0" + spec.files = ["README.md", "Rakefile", "Gemfile", "#{spec.name}.gemspec"] + spec.files += ["LICENSE.txt", "NOTICE.txt"] + spec.files += Dir.glob("lib/**/*.rb") + spec.test_files += Dir.glob("test/**/*") + spec.extensions = ["dependency-check/Rakefile"] + + spec.add_runtime_dependency("red-arrow", "= #{spec.version}") + + spec.add_development_dependency("bundler") + spec.add_development_dependency("rake") + spec.add_development_dependency("test-unit") +end diff --git a/src/arrow/ruby/red-parquet/test/helper.rb b/src/arrow/ruby/red-parquet/test/helper.rb new file mode 100644 index 000000000..169d1df42 --- /dev/null +++ b/src/arrow/ruby/red-parquet/test/helper.rb @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "parquet" + +require "tempfile" + +require "test-unit" diff --git a/src/arrow/ruby/red-parquet/test/run-test.rb b/src/arrow/ruby/red-parquet/test/run-test.rb new file mode 100755 index 000000000..48d2c49e1 --- /dev/null +++ b/src/arrow/ruby/red-parquet/test/run-test.rb @@ -0,0 +1,50 @@ +#!/usr/bin/env ruby +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +$VERBOSE = true + +require "pathname" + +(ENV["ARROW_DLL_PATH"] || "").split(File::PATH_SEPARATOR).each do |path| + RubyInstaller::Runtime.add_dll_directory(path) +end + +base_dir = Pathname.new(__dir__).parent.expand_path +arrow_base_dir = base_dir.parent + "red-arrow" + +lib_dir = base_dir + "lib" +test_dir = base_dir + "test" + +arrow_lib_dir = arrow_base_dir + "lib" +arrow_ext_dir = arrow_base_dir + "ext" + "arrow" + +build_dir = ENV["BUILD_DIR"] +if build_dir + arrow_build_dir = Pathname.new(build_dir) + "red-arrow" +else + arrow_build_dir = arrow_ext_dir +end + +$LOAD_PATH.unshift(arrow_build_dir.to_s) +$LOAD_PATH.unshift(arrow_lib_dir.to_s) +$LOAD_PATH.unshift(lib_dir.to_s) + +require_relative "helper" + +exit(Test::Unit::AutoRunner.run(true, test_dir.to_s)) diff --git a/src/arrow/ruby/red-parquet/test/test-arrow-table.rb b/src/arrow/ruby/red-parquet/test/test-arrow-table.rb new file mode 100644 index 000000000..1ea2669e3 --- /dev/null +++ b/src/arrow/ruby/red-parquet/test/test-arrow-table.rb @@ -0,0 +1,99 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestArrowTable < Test::Unit::TestCase + def setup + @count_field = Arrow::Field.new("count", :uint8) + @visible_field = Arrow::Field.new("visible", :boolean) + @label_field = Arrow::Field.new("label", :string) + schema = Arrow::Schema.new([@count_field, @visible_field, @label_field]) + count_arrays = [ + Arrow::UInt8Array.new([1, 2]), + Arrow::UInt8Array.new([4, 8, 16]), + Arrow::UInt8Array.new([32, 64]), + Arrow::UInt8Array.new([128]), + ] + visible_arrays = [ + Arrow::BooleanArray.new([true, false, nil]), + Arrow::BooleanArray.new([true]), + Arrow::BooleanArray.new([true, false]), + Arrow::BooleanArray.new([nil]), + Arrow::BooleanArray.new([nil]), + ] + label_arrays = [ + Arrow::StringArray.new(["a"]), + Arrow::StringArray.new(["b", "c"]), + Arrow::StringArray.new(["d", nil, nil]), + Arrow::StringArray.new(["e", "f"]), + ] + @count_array = Arrow::ChunkedArray.new(count_arrays) + @visible_array = Arrow::ChunkedArray.new(visible_arrays) + @label_array = Arrow::ChunkedArray.new(label_arrays) + @table = Arrow::Table.new(schema, + [@count_array, @visible_array, @label_array]) + + @output = Tempfile.open(["red-parquet", ".parquet"]) + begin + yield(@output) + ensure + @output.close! + end + end + + def test_save_load_path + @table.save(@output.path) + assert do + @table.equal_metadata(Arrow::Table.load(@output.path), false) + end + end + + def test_save_load_buffer + buffer = Arrow::ResizableBuffer.new(1024) + @table.save(buffer, format: :parquet) + assert do + @table.equal_metadata(Arrow::Table.load(buffer, format: :parquet), false) + end + end + + def test_save_load_compression + @table.save(@output.path, compression: :zstd) + assert do + @table.equal_metadata(Arrow::Table.load(@output.path), false) + end + end + + def test_save_load_compression_path + @table.save(@output.path, compression: {"count" => :zstd}) + assert do + @table.equal_metadata(Arrow::Table.load(@output.path), false) + end + end + + def test_save_load_dictionary + @table.save(@output.path, dictionary: false) + assert do + @table.equal_metadata(Arrow::Table.load(@output.path), false) + end + end + + def test_save_load_dictionary_path + @table.save(@output.path, dictionary: [["label", false]]) + assert do + @table.equal_metadata(Arrow::Table.load(@output.path), false) + end + end +end |