summaryrefslogtreecommitdiffstats
path: root/src/arrow/c_glib/test/parquet
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/c_glib/test/parquet
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/c_glib/test/parquet')
-rw-r--r--src/arrow/c_glib/test/parquet/test-arrow-file-reader.rb69
-rw-r--r--src/arrow/c_glib/test/parquet/test-arrow-file-writer.rb46
-rw-r--r--src/arrow/c_glib/test/parquet/test-writer-properties.rb103
3 files changed, 218 insertions, 0 deletions
diff --git a/src/arrow/c_glib/test/parquet/test-arrow-file-reader.rb b/src/arrow/c_glib/test/parquet/test-arrow-file-reader.rb
new file mode 100644
index 000000000..45eb33596
--- /dev/null
+++ b/src/arrow/c_glib/test/parquet/test-arrow-file-reader.rb
@@ -0,0 +1,69 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestParquetArrowFileReader < Test::Unit::TestCase
+ include Helper::Buildable
+
+ def setup
+ omit("Parquet is required") unless defined?(::Parquet)
+ @file = Tempfile.open(["data", ".parquet"])
+ @a_array = build_string_array(["foo", "bar"])
+ @b_array = build_int32_array([123, 456])
+ @table = build_table("a" => @a_array,
+ "b" => @b_array)
+ writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path)
+ chunk_size = 1
+ writer.write_table(@table, chunk_size)
+ writer.close
+ @reader = Parquet::ArrowFileReader.new(@file.path)
+ end
+
+ def test_schema
+ assert_equal(<<-SCHEMA.chomp, @reader.schema.to_s)
+a: string
+b: int32
+ SCHEMA
+ end
+
+ sub_test_case("#read_row_group") do
+ test("with column indices") do
+ assert_equal(build_table("b" => @b_array.slice(0, 1)),
+ @reader.read_row_group(0, [-1]))
+ end
+
+ test("without column indices") do
+ assert_equal(build_table("a" => @a_array.slice(1, 1),
+ "b" => @b_array.slice(1, 1)),
+ @reader.read_row_group(1))
+ end
+ end
+
+ def test_read_column
+ assert_equal([
+ Arrow::ChunkedArray.new([@a_array]),
+ Arrow::ChunkedArray.new([@b_array]),
+ ],
+ [
+ @reader.read_column_data(0),
+ @reader.read_column_data(-1),
+ ])
+ end
+
+ def test_n_rows
+ assert_equal(2, @reader.n_rows)
+ end
+end
diff --git a/src/arrow/c_glib/test/parquet/test-arrow-file-writer.rb b/src/arrow/c_glib/test/parquet/test-arrow-file-writer.rb
new file mode 100644
index 000000000..855527444
--- /dev/null
+++ b/src/arrow/c_glib/test/parquet/test-arrow-file-writer.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestParquetArrowFileWriter < Test::Unit::TestCase
+ include Helper::Buildable
+
+ def setup
+ omit("Parquet is required") unless defined?(::Parquet)
+ @file = Tempfile.open(["data", ".parquet"])
+ end
+
+ def test_write
+ enabled_values = [true, nil, false, true]
+ table = build_table("enabled" => build_boolean_array(enabled_values))
+ chunk_size = 2
+
+ writer = Parquet::ArrowFileWriter.new(table.schema, @file.path)
+ writer.write_table(table, chunk_size)
+ writer.close
+
+ reader = Parquet::ArrowFileReader.new(@file.path)
+ reader.use_threads = true
+ assert_equal([
+ enabled_values.length / chunk_size,
+ true,
+ ],
+ [
+ reader.n_row_groups,
+ table.equal_metadata(reader.read_table, false),
+ ])
+ end
+end
diff --git a/src/arrow/c_glib/test/parquet/test-writer-properties.rb b/src/arrow/c_glib/test/parquet/test-writer-properties.rb
new file mode 100644
index 000000000..1203a220b
--- /dev/null
+++ b/src/arrow/c_glib/test/parquet/test-writer-properties.rb
@@ -0,0 +1,103 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestParquetWriterProperties < Test::Unit::TestCase
+ def setup
+ omit("Parquet is required") unless defined?(::Parquet)
+ @properties = Parquet::WriterProperties.new
+ end
+
+ def test_compression
+ @properties.set_compression(:gzip)
+ assert_equal(Arrow::CompressionType::GZIP,
+ @properties.get_compression_path("not-specified"))
+ end
+
+ def test_compression_with_path
+ @properties.set_compression(:gzip, "column")
+ assert_equal([
+ Arrow::CompressionType::GZIP,
+ Arrow::CompressionType::UNCOMPRESSED,
+ ],
+ [
+ @properties.get_compression_path("column"),
+ @properties.get_compression_path("not-specified"),
+ ])
+ end
+
+ def test_enable_dictionary
+ @properties.enable_dictionary
+ assert_equal(true,
+ @properties.dictionary_enabled?("not-specified"))
+ end
+
+ def test_enable_dictionary_with_path
+ @properties.disable_dictionary
+ @properties.enable_dictionary("column")
+ assert_equal([
+ true,
+ false,
+ ],
+ [
+ @properties.dictionary_enabled?("column"),
+ @properties.dictionary_enabled?("not-specified"),
+ ])
+ end
+
+ def test_disable_dictionary
+ @properties.disable_dictionary
+ assert_equal(false,
+ @properties.dictionary_enabled?("not-specified"))
+ end
+
+ def test_disable_dictionary_with_path
+ @properties.enable_dictionary
+ @properties.disable_dictionary("column")
+ assert_equal([
+ false,
+ true,
+ ],
+ [
+ @properties.dictionary_enabled?("column"),
+ @properties.dictionary_enabled?("not-specified"),
+ ])
+ end
+
+ def test_dictionary_page_size_limit
+ @properties.dictionary_page_size_limit = 4096
+ assert_equal(4096,
+ @properties.dictionary_page_size_limit)
+ end
+
+ def test_batch_size
+ @properties.batch_size = 100
+ assert_equal(100,
+ @properties.batch_size)
+ end
+
+ def test_data_page_size
+ @properties.data_page_size = 128
+ assert_equal(128,
+ @properties.data_page_size)
+ end
+
+ def test_max_row_group_length
+ @properties.max_row_group_length = 1024
+ assert_equal(1024,
+ @properties.max_row_group_length)
+ end
+end