diff options
Diffstat (limited to 'src/arrow/c_glib/test/dataset/test-file-system-dataset.rb')
-rw-r--r-- | src/arrow/c_glib/test/dataset/test-file-system-dataset.rb | 89 |
1 files changed, 89 insertions, 0 deletions
diff --git a/src/arrow/c_glib/test/dataset/test-file-system-dataset.rb b/src/arrow/c_glib/test/dataset/test-file-system-dataset.rb new file mode 100644 index 000000000..1aef38fcc --- /dev/null +++ b/src/arrow/c_glib/test/dataset/test-file-system-dataset.rb @@ -0,0 +1,89 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDatasetFileSystemDataset < Test::Unit::TestCase + include Helper::Buildable + include Helper::Readable + + def setup + omit("Arrow Dataset is required") unless defined?(ArrowDataset) + Dir.mktmpdir do |tmpdir| + @dir = tmpdir + @format = ArrowDataset::IPCFileFormat.new + @factory = ArrowDataset::FileSystemDatasetFactory.new(@format) + @file_system = Arrow::LocalFileSystem.new + @factory.file_system = @file_system + partitioning_schema = build_schema(label: Arrow::StringDataType.new) + @partitioning = + ArrowDataset::DirectoryPartitioning.new(partitioning_schema) + @factory.partitioning = @partitioning + yield + end + end + + def test_type_name + dataset = @factory.finish + assert_equal("filesystem", dataset.type_name) + end + + def test_format + dataset = @factory.finish + assert_equal(@format, dataset.format) + end + + def test_file_system + dataset = @factory.finish + assert_equal(@file_system, dataset.file_system) + end + + def test_partitioning + dataset = @factory.finish + assert_equal(@partitioning, dataset.partitioning) + end + + def test_read_write + table = build_table(label: build_string_array(["a", "a", "b", "c"]), + count: build_int32_array([1, 10, 2, 3])) + table_reader = Arrow::TableBatchReader.new(table) + scanner_builder = ArrowDataset::ScannerBuilder.new(table_reader) + scanner_builder.use_async = true + scanner = scanner_builder.finish + options = ArrowDataset::FileSystemDatasetWriteOptions.new + options.file_write_options = @format.default_write_options + options.file_system = Arrow::LocalFileSystem.new + options.base_dir = @dir + options.base_name_template = "{i}.arrow" + options.partitioning = @partitioning + ArrowDataset::FileSystemDataset.write_scanner(scanner, options) + Find.find(@dir) do |path| + @factory.add_path(path) if File.file?(path) + end + @factory.partition_base_dir = @dir + dataset = @factory.finish + assert_equal(build_table(count: [ + build_int32_array([1, 10]), + build_int32_array([2]), + build_int32_array([3]), + ], + label: [ + build_string_array(["a", "a"]), + build_string_array(["b"]), + build_string_array(["c"]), + ]), + dataset.to_table) + end +end |