summaryrefslogtreecommitdiffstats
path: root/src/arrow/c_glib/test/dataset/test-file-system-dataset.rb
diff options
context:
space:
mode:
Diffstat (limited to 'src/arrow/c_glib/test/dataset/test-file-system-dataset.rb')
-rw-r--r--src/arrow/c_glib/test/dataset/test-file-system-dataset.rb89
1 files changed, 89 insertions, 0 deletions
diff --git a/src/arrow/c_glib/test/dataset/test-file-system-dataset.rb b/src/arrow/c_glib/test/dataset/test-file-system-dataset.rb
new file mode 100644
index 000000000..1aef38fcc
--- /dev/null
+++ b/src/arrow/c_glib/test/dataset/test-file-system-dataset.rb
@@ -0,0 +1,89 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDatasetFileSystemDataset < Test::Unit::TestCase
+ include Helper::Buildable
+ include Helper::Readable
+
+ def setup
+ omit("Arrow Dataset is required") unless defined?(ArrowDataset)
+ Dir.mktmpdir do |tmpdir|
+ @dir = tmpdir
+ @format = ArrowDataset::IPCFileFormat.new
+ @factory = ArrowDataset::FileSystemDatasetFactory.new(@format)
+ @file_system = Arrow::LocalFileSystem.new
+ @factory.file_system = @file_system
+ partitioning_schema = build_schema(label: Arrow::StringDataType.new)
+ @partitioning =
+ ArrowDataset::DirectoryPartitioning.new(partitioning_schema)
+ @factory.partitioning = @partitioning
+ yield
+ end
+ end
+
+ def test_type_name
+ dataset = @factory.finish
+ assert_equal("filesystem", dataset.type_name)
+ end
+
+ def test_format
+ dataset = @factory.finish
+ assert_equal(@format, dataset.format)
+ end
+
+ def test_file_system
+ dataset = @factory.finish
+ assert_equal(@file_system, dataset.file_system)
+ end
+
+ def test_partitioning
+ dataset = @factory.finish
+ assert_equal(@partitioning, dataset.partitioning)
+ end
+
+ def test_read_write
+ table = build_table(label: build_string_array(["a", "a", "b", "c"]),
+ count: build_int32_array([1, 10, 2, 3]))
+ table_reader = Arrow::TableBatchReader.new(table)
+ scanner_builder = ArrowDataset::ScannerBuilder.new(table_reader)
+ scanner_builder.use_async = true
+ scanner = scanner_builder.finish
+ options = ArrowDataset::FileSystemDatasetWriteOptions.new
+ options.file_write_options = @format.default_write_options
+ options.file_system = Arrow::LocalFileSystem.new
+ options.base_dir = @dir
+ options.base_name_template = "{i}.arrow"
+ options.partitioning = @partitioning
+ ArrowDataset::FileSystemDataset.write_scanner(scanner, options)
+ Find.find(@dir) do |path|
+ @factory.add_path(path) if File.file?(path)
+ end
+ @factory.partition_base_dir = @dir
+ dataset = @factory.finish
+ assert_equal(build_table(count: [
+ build_int32_array([1, 10]),
+ build_int32_array([2]),
+ build_int32_array([3]),
+ ],
+ label: [
+ build_string_array(["a", "a"]),
+ build_string_array(["b"]),
+ build_string_array(["c"]),
+ ]),
+ dataset.to_table)
+ end
+end