diff options
Diffstat (limited to 'src/arrow/ruby/red-parquet/lib/parquet')
5 files changed, 188 insertions, 0 deletions
diff --git a/src/arrow/ruby/red-parquet/lib/parquet/arrow-table-loadable.rb b/src/arrow/ruby/red-parquet/lib/parquet/arrow-table-loadable.rb new file mode 100644 index 000000000..e3aa1ce0a --- /dev/null +++ b/src/arrow/ruby/red-parquet/lib/parquet/arrow-table-loadable.rb @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Parquet + module ArrowTableLoadable + private + def load_as_parquet + input = open_input_stream + reader = Parquet::ArrowFileReader.new(input) + reader.use_threads = (@options[:use_threads] != false) + table = reader.read_table + table.instance_variable_set(:@input, input) + table + end + end +end + +module Arrow + class TableLoader + include Parquet::ArrowTableLoadable + end +end diff --git a/src/arrow/ruby/red-parquet/lib/parquet/arrow-table-savable.rb b/src/arrow/ruby/red-parquet/lib/parquet/arrow-table-savable.rb new file mode 100644 index 000000000..70c597527 --- /dev/null +++ b/src/arrow/ruby/red-parquet/lib/parquet/arrow-table-savable.rb @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Parquet + module ArrowTableSavable + private + def save_as_parquet + properties = WriterProperties.new + @options.each do |key, value| + next if value.nil? + set_method_name = "set_#{key}" + next unless properties.respond_to?(set_method_name) + case value + when ::Array, ::Hash + value.each do |path, v| + properties.__send__(set_method_name, v, path) + end + else + properties.__send__(set_method_name, value) + end + end + chunk_size = @options[:chunk_size] || @table.n_rows + open_raw_output_stream do |output| + ArrowFileWriter.open(@table.schema, + output, + properties) do |writer| + writer.write_table(@table, chunk_size) + end + end + end + end +end + +module Arrow + class TableSaver + include Parquet::ArrowTableSavable + end +end diff --git a/src/arrow/ruby/red-parquet/lib/parquet/loader.rb b/src/arrow/ruby/red-parquet/lib/parquet/loader.rb new file mode 100644 index 000000000..5e25872ff --- /dev/null +++ b/src/arrow/ruby/red-parquet/lib/parquet/loader.rb @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Parquet + class Loader < GObjectIntrospection::Loader + class << self + def load + super("Parquet", Parquet) + end + end + + private + def post_load(repository, namespace) + require_libraries + end + + def require_libraries + require "parquet/arrow-table-loadable" + require "parquet/arrow-table-savable" + require "parquet/writer-properties" + end + + def load_object_info(info) + super + + klass = @base_module.const_get(rubyish_class_name(info)) + if klass.method_defined?(:close) + klass.extend(Arrow::BlockClosable) + end + end + end +end diff --git a/src/arrow/ruby/red-parquet/lib/parquet/version.rb b/src/arrow/ruby/red-parquet/lib/parquet/version.rb new file mode 100644 index 000000000..8c9b41a36 --- /dev/null +++ b/src/arrow/ruby/red-parquet/lib/parquet/version.rb @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Parquet + VERSION = "6.0.1" + + module Version + numbers, TAG = VERSION.split("-") + MAJOR, MINOR, MICRO = numbers.split(".").collect(&:to_i) + STRING = VERSION + end +end diff --git a/src/arrow/ruby/red-parquet/lib/parquet/writer-properties.rb b/src/arrow/ruby/red-parquet/lib/parquet/writer-properties.rb new file mode 100644 index 000000000..5881471b4 --- /dev/null +++ b/src/arrow/ruby/red-parquet/lib/parquet/writer-properties.rb @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Parquet + class WriterProperties + def set_dictionary(enable, path=nil) + if enable + enable_dictionary(path) + else + disable_dictionary(path) + end + end + end +end |