1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
|
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
class TestDatasetFileSystemDataset < Test::Unit::TestCase
include Helper::Buildable
include Helper::Readable
def setup
omit("Arrow Dataset is required") unless defined?(ArrowDataset)
Dir.mktmpdir do |tmpdir|
@dir = tmpdir
@format = ArrowDataset::IPCFileFormat.new
@factory = ArrowDataset::FileSystemDatasetFactory.new(@format)
@file_system = Arrow::LocalFileSystem.new
@factory.file_system = @file_system
partitioning_schema = build_schema(label: Arrow::StringDataType.new)
@partitioning =
ArrowDataset::DirectoryPartitioning.new(partitioning_schema)
@factory.partitioning = @partitioning
yield
end
end
def test_type_name
dataset = @factory.finish
assert_equal("filesystem", dataset.type_name)
end
def test_format
dataset = @factory.finish
assert_equal(@format, dataset.format)
end
def test_file_system
dataset = @factory.finish
assert_equal(@file_system, dataset.file_system)
end
def test_partitioning
dataset = @factory.finish
assert_equal(@partitioning, dataset.partitioning)
end
def test_read_write
table = build_table(label: build_string_array(["a", "a", "b", "c"]),
count: build_int32_array([1, 10, 2, 3]))
table_reader = Arrow::TableBatchReader.new(table)
scanner_builder = ArrowDataset::ScannerBuilder.new(table_reader)
scanner_builder.use_async = true
scanner = scanner_builder.finish
options = ArrowDataset::FileSystemDatasetWriteOptions.new
options.file_write_options = @format.default_write_options
options.file_system = Arrow::LocalFileSystem.new
options.base_dir = @dir
options.base_name_template = "{i}.arrow"
options.partitioning = @partitioning
ArrowDataset::FileSystemDataset.write_scanner(scanner, options)
Find.find(@dir) do |path|
@factory.add_path(path) if File.file?(path)
end
@factory.partition_base_dir = @dir
dataset = @factory.finish
assert_equal(build_table(count: [
build_int32_array([1, 10]),
build_int32_array([2]),
build_int32_array([3]),
],
label: [
build_string_array(["a", "a"]),
build_string_array(["b"]),
build_string_array(["c"]),
]),
dataset.to_table)
end
end
|