diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/ruby/red-arrow-dataset/lib/arrow-dataset/arrow-table-savable.rb | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/ruby/red-arrow-dataset/lib/arrow-dataset/arrow-table-savable.rb')
-rw-r--r-- | src/arrow/ruby/red-arrow-dataset/lib/arrow-dataset/arrow-table-savable.rb | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/src/arrow/ruby/red-arrow-dataset/lib/arrow-dataset/arrow-table-savable.rb b/src/arrow/ruby/red-arrow-dataset/lib/arrow-dataset/arrow-table-savable.rb new file mode 100644 index 000000000..30ad6c292 --- /dev/null +++ b/src/arrow/ruby/red-arrow-dataset/lib/arrow-dataset/arrow-table-savable.rb @@ -0,0 +1,69 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module ArrowDataset + module ArrowTableSavable + private + def save_to_uri + format = FileFormat.resolve(@options[:format]) + options = FileSystemDatasetWriteOptions.new + options.file_write_options = format.default_write_options + path = @output.path + if @output.scheme.nil? + options.file_system = Arrow::LocalFileSystem.new + else + options.file_system = Arrow::FileSystem.create(@output.to_s) + # /C:/... -> C:/... + unless File.expand_path(".").start_with?("/") + path = path.gsub(/\A\//, "") + end + end + partitioning = @options[:partitioning] + if partitioning + # TODO + options.base_dir = File.dirname(path) + options.base_name_template = File.basename(path) + options.partitioning = Partitioning.resolve(@options[:partitioning]) + scanner_builder = ScannerBuilder.new(@table) + scanner_builder.use_async(true) + scanner = scanner_builder.finish + FileSystemDataset.write_scanner(scanner, options) + else + dir = File.dirname(path) + unless File.exist?(dir) + options.file_system.create_dir(dir, true) + end + options.file_system.open_output_stream(path) do |output_stream| + format.open_writer(output_stream, + options.file_system, + path, + @table.schema, + format.default_write_options) do |writer| + reader = Arrow::TableBatchReader.new(@table) + writer.write_record_batch_reader(reader) + end + end + end + end + end +end + +module Arrow + class TableSaver + include ArrowDataset::ArrowTableSavable + end +end |