summaryrefslogtreecommitdiffstats
path: root/src/arrow/ruby/red-arrow/test/raw-records/test-dense-union-array.rb
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/arrow/ruby/red-arrow/test/raw-records/test-dense-union-array.rb494
1 files changed, 494 insertions, 0 deletions
diff --git a/src/arrow/ruby/red-arrow/test/raw-records/test-dense-union-array.rb b/src/arrow/ruby/red-arrow/test/raw-records/test-dense-union-array.rb
new file mode 100644
index 000000000..8d94a77fe
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/test/raw-records/test-dense-union-array.rb
@@ -0,0 +1,494 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module RawRecordsDenseUnionArrayTests
+ def build_schema(type, type_codes)
+ field_description = {}
+ if type.is_a?(Hash)
+ field_description = field_description.merge(type)
+ else
+ field_description[:type] = type
+ end
+ {
+ column: {
+ type: :dense_union,
+ fields: [
+ field_description.merge(name: "0"),
+ field_description.merge(name: "1"),
+ ],
+ type_codes: type_codes,
+ },
+ }
+ end
+
+ # TODO: Use Arrow::RecordBatch.new(build_schema(type, type_codes), records)
+ def build_record_batch(type, records)
+ type_codes = [0, 1]
+ schema = Arrow::Schema.new(build_schema(type, type_codes))
+ type_ids = []
+ offsets = []
+ arrays = schema.fields[0].data_type.fields.collect do |field|
+ sub_schema = Arrow::Schema.new([field])
+ sub_records = []
+ records.each do |record|
+ column = record[0]
+ next if column.nil?
+ next unless column.key?(field.name)
+ sub_records << [column[field.name]]
+ end
+ sub_record_batch = Arrow::RecordBatch.new(sub_schema,
+ sub_records)
+ sub_record_batch.columns[0].data
+ end
+ records.each do |record|
+ column = record[0]
+ if column.key?("0")
+ type_id = type_codes[0]
+ type_ids << type_id
+ offsets << (type_ids.count(type_id) - 1)
+ elsif column.key?("1")
+ type_id = type_codes[1]
+ type_ids << type_id
+ offsets << (type_ids.count(type_id) - 1)
+ end
+ end
+ union_array = Arrow::DenseUnionArray.new(schema.fields[0].data_type,
+ Arrow::Int8Array.new(type_ids),
+ Arrow::Int32Array.new(offsets),
+ arrays)
+ schema = Arrow::Schema.new(column: union_array.value_data_type)
+ Arrow::RecordBatch.new(schema,
+ records.size,
+ [union_array])
+ end
+
+ def test_null
+ records = [
+ [{"0" => nil}],
+ ]
+ target = build(:null, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_boolean
+ records = [
+ [{"0" => true}],
+ [{"1" => nil}],
+ ]
+ target = build(:boolean, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_int8
+ records = [
+ [{"0" => -(2 ** 7)}],
+ [{"1" => nil}],
+ ]
+ target = build(:int8, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_uint8
+ records = [
+ [{"0" => (2 ** 8) - 1}],
+ [{"1" => nil}],
+ ]
+ target = build(:uint8, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_int16
+ records = [
+ [{"0" => -(2 ** 15)}],
+ [{"1" => nil}],
+ ]
+ target = build(:int16, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_uint16
+ records = [
+ [{"0" => (2 ** 16) - 1}],
+ [{"1" => nil}],
+ ]
+ target = build(:uint16, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_int32
+ records = [
+ [{"0" => -(2 ** 31)}],
+ [{"1" => nil}],
+ ]
+ target = build(:int32, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_uint32
+ records = [
+ [{"0" => (2 ** 32) - 1}],
+ [{"1" => nil}],
+ ]
+ target = build(:uint32, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_int64
+ records = [
+ [{"0" => -(2 ** 63)}],
+ [{"1" => nil}],
+ ]
+ target = build(:int64, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_uint64
+ records = [
+ [{"0" => (2 ** 64) - 1}],
+ [{"1" => nil}],
+ ]
+ target = build(:uint64, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_float
+ records = [
+ [{"0" => -1.0}],
+ [{"1" => nil}],
+ ]
+ target = build(:float, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_double
+ records = [
+ [{"0" => -1.0}],
+ [{"1" => nil}],
+ ]
+ target = build(:double, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_binary
+ records = [
+ [{"0" => "\xff".b}],
+ [{"1" => nil}],
+ ]
+ target = build(:binary, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_string
+ records = [
+ [{"0" => "Ruby"}],
+ [{"1" => nil}],
+ ]
+ target = build(:string, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_date32
+ records = [
+ [{"0" => Date.new(1960, 1, 1)}],
+ [{"1" => nil}],
+ ]
+ target = build(:date32, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_date64
+ records = [
+ [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
+ [{"1" => nil}],
+ ]
+ target = build(:date64, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_timestamp_second
+ records = [
+ [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :timestamp,
+ unit: :second,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_timestamp_milli
+ records = [
+ [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :timestamp,
+ unit: :milli,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_timestamp_micro
+ records = [
+ [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :timestamp,
+ unit: :micro,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_timestamp_nano
+ records = [
+ [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :timestamp,
+ unit: :nano,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_time32_second
+ unit = Arrow::TimeUnit::SECOND
+ records = [
+ # 00:10:00
+ [{"0" => Arrow::Time.new(unit, 60 * 10)}],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :time32,
+ unit: :second,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_time32_milli
+ unit = Arrow::TimeUnit::MILLI
+ records = [
+ # 00:10:00.123
+ [{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :time32,
+ unit: :milli,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_time64_micro
+ unit = Arrow::TimeUnit::MICRO
+ records = [
+ # 00:10:00.123456
+ [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :time64,
+ unit: :micro,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_time64_nano
+ unit = Arrow::TimeUnit::NANO
+ records = [
+ # 00:10:00.123456789
+ [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :time64,
+ unit: :nano,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_decimal128
+ records = [
+ [{"0" => BigDecimal("92.92")}],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :decimal128,
+ precision: 8,
+ scale: 2,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_decimal256
+ records = [
+ [{"0" => BigDecimal("92.92")}],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :decimal256,
+ precision: 38,
+ scale: 2,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_list
+ records = [
+ [{"0" => [true, nil, false]}],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :list,
+ field: {
+ name: :sub_element,
+ type: :boolean,
+ },
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_struct
+ records = [
+ [{"0" => {"sub_field" => true}}],
+ [{"1" => nil}],
+ [{"0" => {"sub_field" => nil}}],
+ ]
+ target = build({
+ type: :struct,
+ fields: [
+ {
+ name: :sub_field,
+ type: :boolean,
+ },
+ ],
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_map
+ records = [
+ [{"0" => {"key1" => true, "key2" => nil}}],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :map,
+ key: :string,
+ item: :boolean,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_sparse_union
+ omit("Need to add support for SparseUnionArrayBuilder")
+ records = [
+ [{"0" => {"field1" => true}}],
+ [{"1" => nil}],
+ [{"0" => {"field2" => nil}}],
+ ]
+ target = build({
+ type: :sparse_union,
+ fields: [
+ {
+ name: :field1,
+ type: :boolean,
+ },
+ {
+ name: :field2,
+ type: :uint8,
+ },
+ ],
+ type_codes: [0, 1],
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_dense_union
+ omit("Need to add support for DenseUnionArrayBuilder")
+ records = [
+ [{"0" => {"field1" => true}}],
+ [{"1" => nil}],
+ [{"0" => {"field2" => nil}}],
+ ]
+ target = build({
+ type: :dense_union,
+ fields: [
+ {
+ name: :field1,
+ type: :boolean,
+ },
+ {
+ name: :field2,
+ type: :uint8,
+ },
+ ],
+ type_codes: [0, 1],
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_dictionary
+ omit("Need to add support for DictionaryArrayBuilder")
+ records = [
+ [{"0" => "Ruby"}],
+ [{"1" => nil}],
+ [{"0" => "GLib"}],
+ ]
+ dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
+ target = build({
+ type: :dictionary,
+ index_data_type: :int8,
+ dictionary: dictionary,
+ ordered: true,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+end
+
+class RawRecordsRecordBatchDenseUnionArrayTest < Test::Unit::TestCase
+ include RawRecordsDenseUnionArrayTests
+
+ def build(type, records)
+ build_record_batch(type, records)
+ end
+end
+
+class RawRecordsTableDenseUnionArrayTest < Test::Unit::TestCase
+ include RawRecordsDenseUnionArrayTests
+
+ def build(type, records)
+ build_record_batch(type, records).to_table
+ end
+end