diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/c_glib/test/parquet | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/c_glib/test/parquet')
3 files changed, 218 insertions, 0 deletions
diff --git a/src/arrow/c_glib/test/parquet/test-arrow-file-reader.rb b/src/arrow/c_glib/test/parquet/test-arrow-file-reader.rb new file mode 100644 index 000000000..45eb33596 --- /dev/null +++ b/src/arrow/c_glib/test/parquet/test-arrow-file-reader.rb @@ -0,0 +1,69 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestParquetArrowFileReader < Test::Unit::TestCase + include Helper::Buildable + + def setup + omit("Parquet is required") unless defined?(::Parquet) + @file = Tempfile.open(["data", ".parquet"]) + @a_array = build_string_array(["foo", "bar"]) + @b_array = build_int32_array([123, 456]) + @table = build_table("a" => @a_array, + "b" => @b_array) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1 + writer.write_table(@table, chunk_size) + writer.close + @reader = Parquet::ArrowFileReader.new(@file.path) + end + + def test_schema + assert_equal(<<-SCHEMA.chomp, @reader.schema.to_s) +a: string +b: int32 + SCHEMA + end + + sub_test_case("#read_row_group") do + test("with column indices") do + assert_equal(build_table("b" => @b_array.slice(0, 1)), + @reader.read_row_group(0, [-1])) + end + + test("without column indices") do + assert_equal(build_table("a" => @a_array.slice(1, 1), + "b" => @b_array.slice(1, 1)), + @reader.read_row_group(1)) + end + end + + def test_read_column + assert_equal([ + Arrow::ChunkedArray.new([@a_array]), + Arrow::ChunkedArray.new([@b_array]), + ], + [ + @reader.read_column_data(0), + @reader.read_column_data(-1), + ]) + end + + def test_n_rows + assert_equal(2, @reader.n_rows) + end +end diff --git a/src/arrow/c_glib/test/parquet/test-arrow-file-writer.rb b/src/arrow/c_glib/test/parquet/test-arrow-file-writer.rb new file mode 100644 index 000000000..855527444 --- /dev/null +++ b/src/arrow/c_glib/test/parquet/test-arrow-file-writer.rb @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestParquetArrowFileWriter < Test::Unit::TestCase + include Helper::Buildable + + def setup + omit("Parquet is required") unless defined?(::Parquet) + @file = Tempfile.open(["data", ".parquet"]) + end + + def test_write + enabled_values = [true, nil, false, true] + table = build_table("enabled" => build_boolean_array(enabled_values)) + chunk_size = 2 + + writer = Parquet::ArrowFileWriter.new(table.schema, @file.path) + writer.write_table(table, chunk_size) + writer.close + + reader = Parquet::ArrowFileReader.new(@file.path) + reader.use_threads = true + assert_equal([ + enabled_values.length / chunk_size, + true, + ], + [ + reader.n_row_groups, + table.equal_metadata(reader.read_table, false), + ]) + end +end diff --git a/src/arrow/c_glib/test/parquet/test-writer-properties.rb b/src/arrow/c_glib/test/parquet/test-writer-properties.rb new file mode 100644 index 000000000..1203a220b --- /dev/null +++ b/src/arrow/c_glib/test/parquet/test-writer-properties.rb @@ -0,0 +1,103 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestParquetWriterProperties < Test::Unit::TestCase + def setup + omit("Parquet is required") unless defined?(::Parquet) + @properties = Parquet::WriterProperties.new + end + + def test_compression + @properties.set_compression(:gzip) + assert_equal(Arrow::CompressionType::GZIP, + @properties.get_compression_path("not-specified")) + end + + def test_compression_with_path + @properties.set_compression(:gzip, "column") + assert_equal([ + Arrow::CompressionType::GZIP, + Arrow::CompressionType::UNCOMPRESSED, + ], + [ + @properties.get_compression_path("column"), + @properties.get_compression_path("not-specified"), + ]) + end + + def test_enable_dictionary + @properties.enable_dictionary + assert_equal(true, + @properties.dictionary_enabled?("not-specified")) + end + + def test_enable_dictionary_with_path + @properties.disable_dictionary + @properties.enable_dictionary("column") + assert_equal([ + true, + false, + ], + [ + @properties.dictionary_enabled?("column"), + @properties.dictionary_enabled?("not-specified"), + ]) + end + + def test_disable_dictionary + @properties.disable_dictionary + assert_equal(false, + @properties.dictionary_enabled?("not-specified")) + end + + def test_disable_dictionary_with_path + @properties.enable_dictionary + @properties.disable_dictionary("column") + assert_equal([ + false, + true, + ], + [ + @properties.dictionary_enabled?("column"), + @properties.dictionary_enabled?("not-specified"), + ]) + end + + def test_dictionary_page_size_limit + @properties.dictionary_page_size_limit = 4096 + assert_equal(4096, + @properties.dictionary_page_size_limit) + end + + def test_batch_size + @properties.batch_size = 100 + assert_equal(100, + @properties.batch_size) + end + + def test_data_page_size + @properties.data_page_size = 128 + assert_equal(128, + @properties.data_page_size) + end + + def test_max_row_group_length + @properties.max_row_group_length = 1024 + assert_equal(1024, + @properties.max_row_group_length) + end +end |