diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/ruby/red-arrow/test/raw-records | |
parent | Initial commit. (diff) | |
download | ceph-upstream/18.2.2.tar.xz ceph-upstream/18.2.2.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/ruby/red-arrow/test/raw-records')
8 files changed, 2952 insertions, 0 deletions
diff --git a/src/arrow/ruby/red-arrow/test/raw-records/test-basic-arrays.rb b/src/arrow/ruby/red-arrow/test/raw-records/test-basic-arrays.rb new file mode 100644 index 000000000..c80020666 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/raw-records/test-basic-arrays.rb @@ -0,0 +1,365 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module RawRecordsBasicArraysTests + def test_null + records = [ + [nil], + [nil], + [nil], + [nil], + ] + target = build({column: :null}, records) + assert_equal(records, target.raw_records) + end + + def test_boolean + records = [ + [true], + [nil], + [false], + ] + target = build({column: :boolean}, records) + assert_equal(records, target.raw_records) + end + + def test_int8 + records = [ + [-(2 ** 7)], + [nil], + [(2 ** 7) - 1], + ] + target = build({column: :int8}, records) + assert_equal(records, target.raw_records) + end + + def test_uint8 + records = [ + [0], + [nil], + [(2 ** 8) - 1], + ] + target = build({column: :uint8}, records) + assert_equal(records, target.raw_records) + end + + def test_int16 + records = [ + [-(2 ** 15)], + [nil], + [(2 ** 15) - 1], + ] + target = build({column: :int16}, records) + assert_equal(records, target.raw_records) + end + + def test_uint16 + records = [ + [0], + [nil], + [(2 ** 16) - 1], + ] + target = build({column: :uint16}, records) + assert_equal(records, target.raw_records) + end + + def test_int32 + records = [ + [-(2 ** 31)], + [nil], + [(2 ** 31) - 1], + ] + target = build({column: :int32}, records) + assert_equal(records, target.raw_records) + end + + def test_uint32 + records = [ + [0], + [nil], + [(2 ** 32) - 1], + ] + target = build({column: :uint32}, records) + assert_equal(records, target.raw_records) + end + + def test_int64 + records = [ + [-(2 ** 63)], + [nil], + [(2 ** 63) - 1], + ] + target = build({column: :int64}, records) + assert_equal(records, target.raw_records) + end + + def test_uint64 + records = [ + [0], + [nil], + [(2 ** 64) - 1], + ] + target = build({column: :uint64}, records) + assert_equal(records, target.raw_records) + end + + def test_float + records = [ + [-1.0], + [nil], + [1.0], + ] + target = build({column: :float}, records) + assert_equal(records, target.raw_records) + end + + def test_double + records = [ + [-1.0], + [nil], + [1.0], + ] + target = build({column: :double}, records) + assert_equal(records, target.raw_records) + end + + def test_binary + records = [ + ["\x00".b], + [nil], + ["\xff".b], + ] + target = build({column: :binary}, records) + assert_equal(records, target.raw_records) + end + + def test_tring + records = [ + ["Ruby"], + [nil], + ["\u3042"], # U+3042 HIRAGANA LETTER A + ] + target = build({column: :string}, records) + assert_equal(records, target.raw_records) + end + + def test_date32 + records = [ + [Date.new(1960, 1, 1)], + [nil], + [Date.new(2017, 8, 23)], + ] + target = build({column: :date32}, records) + assert_equal(records, target.raw_records) + end + + def test_date64 + records = [ + [DateTime.new(1960, 1, 1, 2, 9, 30)], + [nil], + [DateTime.new(2017, 8, 23, 14, 57, 2)], + ] + target = build({column: :date64}, records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_second + records = [ + [Time.parse("1960-01-01T02:09:30Z")], + [nil], + [Time.parse("2017-08-23T14:57:02Z")], + ] + target = build({ + column: { + type: :timestamp, + unit: :second, + } + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_milli + records = [ + [Time.parse("1960-01-01T02:09:30.123Z")], + [nil], + [Time.parse("2017-08-23T14:57:02.987Z")], + ] + target = build({ + column: { + type: :timestamp, + unit: :milli, + } + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_micro + records = [ + [Time.parse("1960-01-01T02:09:30.123456Z")], + [nil], + [Time.parse("2017-08-23T14:57:02.987654Z")], + ] + target = build({ + column: { + type: :timestamp, + unit: :micro, + } + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_nano + records = [ + [Time.parse("1960-01-01T02:09:30.123456789Z")], + [nil], + [Time.parse("2017-08-23T14:57:02.987654321Z")], + ] + target = build({ + column: { + type: :timestamp, + unit: :nano, + } + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + records = [ + [Arrow::Time.new(unit, 60 * 10)], # 00:10:00 + [nil], + [Arrow::Time.new(unit, 60 * 60 * 2 + 9)], # 02:00:09 + ] + target = build({ + column: { + type: :time32, + unit: :second, + } + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + records = [ + [Arrow::Time.new(unit, (60 * 10) * 1000 + 123)], # 00:10:00.123 + [nil], + [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987)], # 02:00:09.987 + ] + target = build({ + column: { + type: :time32, + unit: :milli, + } + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + records = [ + # 00:10:00.123456 + [Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)], + [nil], + # 02:00:09.987654 + [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654)], + ] + target = build({ + column: { + type: :time64, + unit: :micro, + } + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + records = [ + # 00:10:00.123456789 + [Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)], + [nil], + # 02:00:09.987654321 + [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321)], + ] + target = build({ + column: { + type: :time64, + unit: :nano, + } + }, + records) + assert_equal(records, target.raw_records) + end + + def test_decimal128 + records = [ + [BigDecimal("92.92")], + [nil], + [BigDecimal("29.29")], + ] + target = build({ + column: { + type: :decimal128, + precision: 8, + scale: 2, + } + }, + records) + assert_equal(records, target.raw_records) + end + + def test_decimal256 + records = [ + [BigDecimal("92.92")], + [nil], + [BigDecimal("29.29")], + ] + target = build({ + column: { + type: :decimal256, + precision: 38, + scale: 2, + } + }, + records) + assert_equal(records, target.raw_records) + end +end + +class RawRecordsRecordBatchBasicArraysTest < Test::Unit::TestCase + include RawRecordsBasicArraysTests + + def build(schema, records) + Arrow::RecordBatch.new(schema, records) + end +end + +class RawRecordsTableBasicArraysTest < Test::Unit::TestCase + include RawRecordsBasicArraysTests + + def build(schema, records) + Arrow::Table.new(schema, records) + end +end diff --git a/src/arrow/ruby/red-arrow/test/raw-records/test-dense-union-array.rb b/src/arrow/ruby/red-arrow/test/raw-records/test-dense-union-array.rb new file mode 100644 index 000000000..8d94a77fe --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/raw-records/test-dense-union-array.rb @@ -0,0 +1,494 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module RawRecordsDenseUnionArrayTests + def build_schema(type, type_codes) + field_description = {} + if type.is_a?(Hash) + field_description = field_description.merge(type) + else + field_description[:type] = type + end + { + column: { + type: :dense_union, + fields: [ + field_description.merge(name: "0"), + field_description.merge(name: "1"), + ], + type_codes: type_codes, + }, + } + end + + # TODO: Use Arrow::RecordBatch.new(build_schema(type, type_codes), records) + def build_record_batch(type, records) + type_codes = [0, 1] + schema = Arrow::Schema.new(build_schema(type, type_codes)) + type_ids = [] + offsets = [] + arrays = schema.fields[0].data_type.fields.collect do |field| + sub_schema = Arrow::Schema.new([field]) + sub_records = [] + records.each do |record| + column = record[0] + next if column.nil? + next unless column.key?(field.name) + sub_records << [column[field.name]] + end + sub_record_batch = Arrow::RecordBatch.new(sub_schema, + sub_records) + sub_record_batch.columns[0].data + end + records.each do |record| + column = record[0] + if column.key?("0") + type_id = type_codes[0] + type_ids << type_id + offsets << (type_ids.count(type_id) - 1) + elsif column.key?("1") + type_id = type_codes[1] + type_ids << type_id + offsets << (type_ids.count(type_id) - 1) + end + end + union_array = Arrow::DenseUnionArray.new(schema.fields[0].data_type, + Arrow::Int8Array.new(type_ids), + Arrow::Int32Array.new(offsets), + arrays) + schema = Arrow::Schema.new(column: union_array.value_data_type) + Arrow::RecordBatch.new(schema, + records.size, + [union_array]) + end + + def test_null + records = [ + [{"0" => nil}], + ] + target = build(:null, records) + assert_equal(records, target.raw_records) + end + + def test_boolean + records = [ + [{"0" => true}], + [{"1" => nil}], + ] + target = build(:boolean, records) + assert_equal(records, target.raw_records) + end + + def test_int8 + records = [ + [{"0" => -(2 ** 7)}], + [{"1" => nil}], + ] + target = build(:int8, records) + assert_equal(records, target.raw_records) + end + + def test_uint8 + records = [ + [{"0" => (2 ** 8) - 1}], + [{"1" => nil}], + ] + target = build(:uint8, records) + assert_equal(records, target.raw_records) + end + + def test_int16 + records = [ + [{"0" => -(2 ** 15)}], + [{"1" => nil}], + ] + target = build(:int16, records) + assert_equal(records, target.raw_records) + end + + def test_uint16 + records = [ + [{"0" => (2 ** 16) - 1}], + [{"1" => nil}], + ] + target = build(:uint16, records) + assert_equal(records, target.raw_records) + end + + def test_int32 + records = [ + [{"0" => -(2 ** 31)}], + [{"1" => nil}], + ] + target = build(:int32, records) + assert_equal(records, target.raw_records) + end + + def test_uint32 + records = [ + [{"0" => (2 ** 32) - 1}], + [{"1" => nil}], + ] + target = build(:uint32, records) + assert_equal(records, target.raw_records) + end + + def test_int64 + records = [ + [{"0" => -(2 ** 63)}], + [{"1" => nil}], + ] + target = build(:int64, records) + assert_equal(records, target.raw_records) + end + + def test_uint64 + records = [ + [{"0" => (2 ** 64) - 1}], + [{"1" => nil}], + ] + target = build(:uint64, records) + assert_equal(records, target.raw_records) + end + + def test_float + records = [ + [{"0" => -1.0}], + [{"1" => nil}], + ] + target = build(:float, records) + assert_equal(records, target.raw_records) + end + + def test_double + records = [ + [{"0" => -1.0}], + [{"1" => nil}], + ] + target = build(:double, records) + assert_equal(records, target.raw_records) + end + + def test_binary + records = [ + [{"0" => "\xff".b}], + [{"1" => nil}], + ] + target = build(:binary, records) + assert_equal(records, target.raw_records) + end + + def test_string + records = [ + [{"0" => "Ruby"}], + [{"1" => nil}], + ] + target = build(:string, records) + assert_equal(records, target.raw_records) + end + + def test_date32 + records = [ + [{"0" => Date.new(1960, 1, 1)}], + [{"1" => nil}], + ] + target = build(:date32, records) + assert_equal(records, target.raw_records) + end + + def test_date64 + records = [ + [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}], + [{"1" => nil}], + ] + target = build(:date64, records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_second + records = [ + [{"0" => Time.parse("1960-01-01T02:09:30Z")}], + [{"1" => nil}], + ] + target = build({ + type: :timestamp, + unit: :second, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_milli + records = [ + [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}], + [{"1" => nil}], + ] + target = build({ + type: :timestamp, + unit: :milli, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_micro + records = [ + [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}], + [{"1" => nil}], + ] + target = build({ + type: :timestamp, + unit: :micro, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_nano + records = [ + [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}], + [{"1" => nil}], + ] + target = build({ + type: :timestamp, + unit: :nano, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + records = [ + # 00:10:00 + [{"0" => Arrow::Time.new(unit, 60 * 10)}], + [{"1" => nil}], + ] + target = build({ + type: :time32, + unit: :second, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + records = [ + # 00:10:00.123 + [{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}], + [{"1" => nil}], + ] + target = build({ + type: :time32, + unit: :milli, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + records = [ + # 00:10:00.123456 + [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}], + [{"1" => nil}], + ] + target = build({ + type: :time64, + unit: :micro, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + records = [ + # 00:10:00.123456789 + [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}], + [{"1" => nil}], + ] + target = build({ + type: :time64, + unit: :nano, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_decimal128 + records = [ + [{"0" => BigDecimal("92.92")}], + [{"1" => nil}], + ] + target = build({ + type: :decimal128, + precision: 8, + scale: 2, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_decimal256 + records = [ + [{"0" => BigDecimal("92.92")}], + [{"1" => nil}], + ] + target = build({ + type: :decimal256, + precision: 38, + scale: 2, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_list + records = [ + [{"0" => [true, nil, false]}], + [{"1" => nil}], + ] + target = build({ + type: :list, + field: { + name: :sub_element, + type: :boolean, + }, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_struct + records = [ + [{"0" => {"sub_field" => true}}], + [{"1" => nil}], + [{"0" => {"sub_field" => nil}}], + ] + target = build({ + type: :struct, + fields: [ + { + name: :sub_field, + type: :boolean, + }, + ], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_map + records = [ + [{"0" => {"key1" => true, "key2" => nil}}], + [{"1" => nil}], + ] + target = build({ + type: :map, + key: :string, + item: :boolean, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_sparse_union + omit("Need to add support for SparseUnionArrayBuilder") + records = [ + [{"0" => {"field1" => true}}], + [{"1" => nil}], + [{"0" => {"field2" => nil}}], + ] + target = build({ + type: :sparse_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_dense_union + omit("Need to add support for DenseUnionArrayBuilder") + records = [ + [{"0" => {"field1" => true}}], + [{"1" => nil}], + [{"0" => {"field2" => nil}}], + ] + target = build({ + type: :dense_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_dictionary + omit("Need to add support for DictionaryArrayBuilder") + records = [ + [{"0" => "Ruby"}], + [{"1" => nil}], + [{"0" => "GLib"}], + ] + dictionary = Arrow::StringArray.new(["GLib", "Ruby"]) + target = build({ + type: :dictionary, + index_data_type: :int8, + dictionary: dictionary, + ordered: true, + }, + records) + assert_equal(records, target.raw_records) + end +end + +class RawRecordsRecordBatchDenseUnionArrayTest < Test::Unit::TestCase + include RawRecordsDenseUnionArrayTests + + def build(type, records) + build_record_batch(type, records) + end +end + +class RawRecordsTableDenseUnionArrayTest < Test::Unit::TestCase + include RawRecordsDenseUnionArrayTests + + def build(type, records) + build_record_batch(type, records).to_table + end +end diff --git a/src/arrow/ruby/red-arrow/test/raw-records/test-list-array.rb b/src/arrow/ruby/red-arrow/test/raw-records/test-list-array.rb new file mode 100644 index 000000000..6d7d4c079 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/raw-records/test-list-array.rb @@ -0,0 +1,571 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module RawRecordsListArrayTests + def build_schema(type) + field_description = { + name: :element, + } + if type.is_a?(Hash) + field_description = field_description.merge(type) + else + field_description[:type] = type + end + { + column: { + type: :list, + field: field_description, + }, + } + end + + def test_null + records = [ + [[nil, nil, nil]], + [nil], + ] + target = build(:null, records) + assert_equal(records, target.raw_records) + end + + def test_boolean + records = [ + [[true, nil, false]], + [nil], + ] + target = build(:boolean, records) + assert_equal(records, target.raw_records) + end + + def test_int8 + records = [ + [[-(2 ** 7), nil, (2 ** 7) - 1]], + [nil], + ] + target = build(:int8, records) + assert_equal(records, target.raw_records) + end + + def test_uint8 + records = [ + [[0, nil, (2 ** 8) - 1]], + [nil], + ] + target = build(:uint8, records) + assert_equal(records, target.raw_records) + end + + def test_int16 + records = [ + [[-(2 ** 15), nil, (2 ** 15) - 1]], + [nil], + ] + target = build(:int16, records) + assert_equal(records, target.raw_records) + end + + def test_uint16 + records = [ + [[0, nil, (2 ** 16) - 1]], + [nil], + ] + target = build(:uint16, records) + assert_equal(records, target.raw_records) + end + + def test_int32 + records = [ + [[-(2 ** 31), nil, (2 ** 31) - 1]], + [nil], + ] + target = build(:int32, records) + assert_equal(records, target.raw_records) + end + + def test_uint32 + records = [ + [[0, nil, (2 ** 32) - 1]], + [nil], + ] + target = build(:uint32, records) + assert_equal(records, target.raw_records) + end + + def test_int64 + records = [ + [[-(2 ** 63), nil, (2 ** 63) - 1]], + [nil], + ] + target = build(:int64, records) + assert_equal(records, target.raw_records) + end + + def test_uint64 + records = [ + [[0, nil, (2 ** 64) - 1]], + [nil], + ] + target = build(:uint64, records) + assert_equal(records, target.raw_records) + end + + def test_float + records = [ + [[-1.0, nil, 1.0]], + [nil], + ] + target = build(:float, records) + assert_equal(records, target.raw_records) + end + + def test_double + records = [ + [[-1.0, nil, 1.0]], + [nil], + ] + target = build(:double, records) + assert_equal(records, target.raw_records) + end + + def test_binary + records = [ + [["\x00".b, nil, "\xff".b]], + [nil], + ] + target = build(:binary, records) + assert_equal(records, target.raw_records) + end + + def test_string + records = [ + [ + [ + "Ruby", + nil, + "\u3042", # U+3042 HIRAGANA LETTER A + ], + ], + [nil], + ] + target = build(:string, records) + assert_equal(records, target.raw_records) + end + + def test_date32 + records = [ + [ + [ + Date.new(1960, 1, 1), + nil, + Date.new(2017, 8, 23), + ], + ], + [nil], + ] + target = build(:date32, records) + assert_equal(records, target.raw_records) + end + + def test_date64 + records = [ + [ + [ + DateTime.new(1960, 1, 1, 2, 9, 30), + nil, + DateTime.new(2017, 8, 23, 14, 57, 2), + ], + ], + [nil], + ] + target = build(:date64, records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_second + records = [ + [ + [ + Time.parse("1960-01-01T02:09:30Z"), + nil, + Time.parse("2017-08-23T14:57:02Z"), + ], + ], + [nil], + ] + target = build({ + type: :timestamp, + unit: :second, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_milli + records = [ + [ + [ + Time.parse("1960-01-01T02:09:30.123Z"), + nil, + Time.parse("2017-08-23T14:57:02.987Z"), + ], + ], + [nil], + ] + target = build({ + type: :timestamp, + unit: :milli, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_micro + records = [ + [ + [ + Time.parse("1960-01-01T02:09:30.123456Z"), + nil, + Time.parse("2017-08-23T14:57:02.987654Z"), + ], + ], + [nil], + ] + target = build({ + type: :timestamp, + unit: :micro, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_nano + records = [ + [ + [ + Time.parse("1960-01-01T02:09:30.123456789Z"), + nil, + Time.parse("2017-08-23T14:57:02.987654321Z"), + ], + ], + [nil], + ] + target = build({ + type: :timestamp, + unit: :nano, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + records = [ + [ + [ + # 00:10:00 + Arrow::Time.new(unit, 60 * 10), + nil, + # 02:00:09 + Arrow::Time.new(unit, 60 * 60 * 2 + 9), + ], + ], + [nil], + ] + target = build({ + type: :time32, + unit: :second, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + records = [ + [ + [ + # 00:10:00.123 + Arrow::Time.new(unit, (60 * 10) * 1000 + 123), + nil, + # 02:00:09.987 + Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987), + ], + ], + [nil], + ] + target = build({ + type: :time32, + unit: :milli, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + records = [ + [ + [ + # 00:10:00.123456 + Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456), + nil, + # 02:00:09.987654 + Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654), + ], + ], + [nil], + ] + target = build({ + type: :time64, + unit: :micro, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + records = [ + [ + [ + # 00:10:00.123456789 + Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789), + nil, + # 02:00:09.987654321 + Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321), + ], + ], + [nil], + ] + target = build({ + type: :time64, + unit: :nano, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_decimal128 + records = [ + [ + [ + BigDecimal("92.92"), + nil, + BigDecimal("29.29"), + ], + ], + [nil], + ] + target = build({ + type: :decimal128, + precision: 8, + scale: 2, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_decimal256 + records = [ + [ + [ + BigDecimal("92.92"), + nil, + BigDecimal("29.29"), + ], + ], + [nil], + ] + target = build({ + type: :decimal256, + precision: 38, + scale: 2, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_list + records = [ + [ + [ + [ + true, + nil, + ], + nil, + [ + nil, + false, + ], + ], + ], + [nil], + ] + target = build({ + type: :list, + field: { + name: :sub_element, + type: :boolean, + }, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_struct + records = [ + [ + [ + {"field" => true}, + nil, + {"field" => nil}, + ], + ], + [nil], + ] + target = build({ + type: :struct, + fields: [ + { + name: :field, + type: :boolean, + }, + ], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_map + records = [ + [ + [ + {"key1" => true, "key2" => nil}, + nil, + ], + ], + [nil], + ] + target = build({ + type: :map, + key: :string, + item: :boolean, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_sparse + omit("Need to add support for SparseUnionArrayBuilder") + records = [ + [ + [ + {"field1" => true}, + nil, + {"field2" => nil}, + ], + ], + [nil], + ] + target = build({ + type: :sparse_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_dense + omit("Need to add support for DenseUnionArrayBuilder") + records = [ + [ + [ + {"field1" => true}, + nil, + {"field2" => nil}, + ], + ], + [nil], + ] + target = build({ + type: :dense_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_dictionary + omit("Need to add support for DictionaryArrayBuilder") + records = [ + [ + [ + "Ruby", + nil, + "GLib", + ], + ], + [nil], + ] + dictionary = Arrow::StringArray.new(["GLib", "Ruby"]) + target = build({ + type: :dictionary, + index_data_type: :int8, + dictionary: dictionary, + ordered: true, + }, + records) + assert_equal(records, target.raw_records) + end +end + +class RawRecordsRecordBatchListArrayTest < Test::Unit::TestCase + include RawRecordsListArrayTests + + def build(type, records) + Arrow::RecordBatch.new(build_schema(type), records) + end +end + +class RawRecordsTableListArrayTest < Test::Unit::TestCase + include RawRecordsListArrayTests + + def build(type, records) + Arrow::Table.new(build_schema(type), records) + end +end diff --git a/src/arrow/ruby/red-arrow/test/raw-records/test-map-array.rb b/src/arrow/ruby/red-arrow/test/raw-records/test-map-array.rb new file mode 100644 index 000000000..c5abb7d77 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/raw-records/test-map-array.rb @@ -0,0 +1,441 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module RawRecordsMapArrayTests + def build_schema(type) + { + column: { + type: :map, + key: :string, + item: type + }, + } + end + + def test_null + records = [ + [{"key1" => nil}], + [nil], + ] + target = build(:null, records) + assert_equal(records, target.raw_records) + end + + def test_boolean + records = [ + [{"key1" => true, "key2" => nil}], + [nil], + ] + target = build(:boolean, records) + assert_equal(records, target.raw_records) + end + + def test_int8 + records = [ + [{"key1" => -(2 ** 7), "key2" => nil}], + [nil], + ] + target = build(:int8, records) + assert_equal(records, target.raw_records) + end + + def test_uint8 + records = [ + [{"key1" => (2 ** 8) - 1, "key2" => nil}], + [nil], + ] + target = build(:uint8, records) + assert_equal(records, target.raw_records) + end + + def test_int16 + records = [ + [{"key1" => -(2 ** 15), "key2" => nil}], + [nil], + ] + target = build(:int16, records) + assert_equal(records, target.raw_records) + end + + def test_uint16 + records = [ + [{"key1" => (2 ** 16) - 1, "key2" => nil}], + [nil], + ] + target = build(:uint16, records) + assert_equal(records, target.raw_records) + end + + def test_int32 + records = [ + [{"key1" => -(2 ** 31), "key2" => nil}], + [nil], + ] + target = build(:int32, records) + assert_equal(records, target.raw_records) + end + + def test_uint32 + records = [ + [{"key1" => (2 ** 32) - 1, "key2" => nil}], + [nil], + ] + target = build(:uint32, records) + assert_equal(records, target.raw_records) + end + + def test_int64 + records = [ + [{"key1" => -(2 ** 63), "key2" => nil}], + [nil], + ] + target = build(:int64, records) + assert_equal(records, target.raw_records) + end + + def test_uint64 + records = [ + [{"key1" => (2 ** 64) - 1, "key2" => nil}], + [nil], + ] + target = build(:uint64, records) + assert_equal(records, target.raw_records) + end + + def test_float + records = [ + [{"key1" => -1.0, "key2" => nil}], + [nil], + ] + target = build(:float, records) + assert_equal(records, target.raw_records) + end + + def test_double + records = [ + [{"key1" => -1.0, "key2" => nil}], + [nil], + ] + target = build(:double, records) + assert_equal(records, target.raw_records) + end + + def test_binary + records = [ + [{"key1" => "\xff".b, "key2" => nil}], + [nil], + ] + target = build(:binary, records) + assert_equal(records, target.raw_records) + end + + def test_string + records = [ + [{"key1" => "Ruby", "key2" => nil}], + [nil], + ] + target = build(:string, records) + assert_equal(records, target.raw_records) + end + + def test_date32 + records = [ + [{"key1" => Date.new(1960, 1, 1), "key2" => nil}], + [nil], + ] + target = build(:date32, records) + assert_equal(records, target.raw_records) + end + + def test_date64 + records = [ + [{"key1" => DateTime.new(1960, 1, 1, 2, 9, 30), "key2" => nil}], + [nil], + ] + target = build(:date64, records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_second + records = [ + [{"key1" => Time.parse("1960-01-01T02:09:30Z"), "key2" => nil}], + [nil], + ] + target = build({ + type: :timestamp, + unit: :second, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_milli + records = [ + [{"key1" => Time.parse("1960-01-01T02:09:30.123Z"), "key2" => nil}], + [nil], + ] + target = build({ + type: :timestamp, + unit: :milli, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_micro + records = [ + [{"key1" => Time.parse("1960-01-01T02:09:30.123456Z"), "key2" => nil}], + [nil], + ] + target = build({ + type: :timestamp, + unit: :micro, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_nano + records = [ + [{"key1" => Time.parse("1960-01-01T02:09:30.123456789Z"), "key2" => nil}], + [nil], + ] + target = build({ + type: :timestamp, + unit: :nano, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + records = [ + # 00:10:00 + [{"key1" => Arrow::Time.new(unit, 60 * 10), "key2" => nil}], + [nil], + ] + target = build({ + type: :time32, + unit: :second, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + records = [ + # 00:10:00.123 + [{"key1" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123), "key2" => nil}], + [nil], + ] + target = build({ + type: :time32, + unit: :milli, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + records = [ + # 00:10:00.123456 + [{"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456), "key2" => nil}], + [nil], + ] + target = build({ + type: :time64, + unit: :micro, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + records = [ + # 00:10:00.123456789 + [{"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789), "key2" => nil}], + [nil], + ] + target = build({ + type: :time64, + unit: :nano, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_decimal128 + records = [ + [{"key1" => BigDecimal("92.92"), "key2" => nil}], + [nil], + ] + target = build({ + type: :decimal128, + precision: 8, + scale: 2, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_decimal256 + records = [ + [{"key1" => BigDecimal("92.92"), "key2" => nil}], + [nil], + ] + target = build({ + type: :decimal256, + precision: 38, + scale: 2, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_list + records = [ + [{"key1" => [true, nil, false], "key2" => nil}], + [nil], + ] + target = build({ + type: :list, + field: { + name: :element, + type: :boolean, + }, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_struct + records = [ + [{"key1" => {"field" => true}, "key2" => nil, "key3" => {"field" => nil}}], + [nil], + ] + target = build({ + type: :struct, + fields: [ + { + name: :field, + type: :boolean, + }, + ], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_map + records = [ + [{"key1" => {"sub_key1" => true, "sub_key2" => nil}, "key2" => nil}], + [nil], + ] + target = build({ + type: :map, + key: :string, + item: :boolean, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_sparse_union + omit("Need to add support for SparseUnionArrayBuilder") + records = [ + [{"key1" => {"field" => true, "key2" => nil, "key3" => {"field" => nil}}}], + [nil], + ] + target = build({ + type: :sparse_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_dense_union + omit("Need to add support for DenseUnionArrayBuilder") + records = [ + [{"key1" => {"field1" => true}, "key2" => nil, "key3" => {"field2" => nil}}], + [nil], + ] + target = build({ + type: :dense_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_dictionary + omit("Need to add support for DictionaryArrayBuilder") + records = [ + [{"key1" => "Ruby", "key2" => nil, "key3" => "GLib"}], + [nil], + ] + dictionary = Arrow::StringArray.new(["GLib", "Ruby"]) + target = build({ + type: :dictionary, + index_data_type: :int8, + dictionary: dictionary, + ordered: true, + }, + records) + assert_equal(records, target.raw_records) + end +end + +class RawRecordsRecordBatchMapArrayTest < Test::Unit::TestCase + include RawRecordsMapArrayTests + + def build(type, records) + Arrow::RecordBatch.new(build_schema(type), records) + end +end + +class RawRecordsTableMapArrayTest < Test::Unit::TestCase + include RawRecordsMapArrayTests + + def build(type, records) + Arrow::Table.new(build_schema(type), records) + end +end diff --git a/src/arrow/ruby/red-arrow/test/raw-records/test-multiple-columns.rb b/src/arrow/ruby/red-arrow/test/raw-records/test-multiple-columns.rb new file mode 100644 index 000000000..50dff67ce --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/raw-records/test-multiple-columns.rb @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module RawRecordsMultipleColumnsTests + def test_3_elements + records = [ + [true, nil, "Ruby"], + [nil, 0, "GLib"], + [false, 2 ** 8 - 1, nil], + ] + target = build([ + {name: :column0, type: :boolean}, + {name: :column1, type: :uint8}, + {name: :column2, type: :string}, + ], + records) + assert_equal(records, target.raw_records) + end + + def test_4_elements + records = [ + [true, nil, "Ruby", -(2 ** 63)], + [nil, 0, "GLib", nil], + [false, 2 ** 8 - 1, nil, (2 ** 63) - 1], + ] + target = build([ + {name: :column0, type: :boolean}, + {name: :column1, type: :uint8}, + {name: :column2, type: :string}, + {name: :column3, type: :int64}, + ], + records) + assert_equal(records, target.raw_records) + end +end + +class RawRecordsRecordBatchMultipleColumnsTest < Test::Unit::TestCase + include RawRecordsMultipleColumnsTests + + def build(schema, records) + Arrow::RecordBatch.new(schema, records) + end +end + +class RawRecordsTableMultipleColumnsTest < Test::Unit::TestCase + include RawRecordsMultipleColumnsTests + + def build(schema, records) + Arrow::Table.new(schema, records) + end +end diff --git a/src/arrow/ruby/red-arrow/test/raw-records/test-sparse-union-array.rb b/src/arrow/ruby/red-arrow/test/raw-records/test-sparse-union-array.rb new file mode 100644 index 000000000..415401216 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/raw-records/test-sparse-union-array.rb @@ -0,0 +1,484 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module RawRecordsSparseUnionArrayTests + def build_schema(type, type_codes) + field_description = {} + if type.is_a?(Hash) + field_description = field_description.merge(type) + else + field_description[:type] = type + end + { + column: { + type: :sparse_union, + fields: [ + field_description.merge(name: "0"), + field_description.merge(name: "1"), + ], + type_codes: type_codes, + }, + } + end + + # TODO: Use Arrow::RecordBatch.new(build_schema(type, type_codes), records) + def build_record_batch(type, records) + type_codes = [0, 1] + schema = Arrow::Schema.new(build_schema(type, type_codes)) + type_ids = [] + arrays = schema.fields[0].data_type.fields.collect do |field| + sub_schema = Arrow::Schema.new([field]) + sub_records = records.collect do |record| + [record[0].nil? ? nil : record[0][field.name]] + end + sub_record_batch = Arrow::RecordBatch.new(sub_schema, + sub_records) + sub_record_batch.columns[0].data + end + records.each do |record| + column = record[0] + if column.key?("0") + type_ids << type_codes[0] + elsif column.key?("1") + type_ids << type_codes[1] + end + end + union_array = Arrow::SparseUnionArray.new(schema.fields[0].data_type, + Arrow::Int8Array.new(type_ids), + arrays) + schema = Arrow::Schema.new(column: union_array.value_data_type) + Arrow::RecordBatch.new(schema, + records.size, + [union_array]) + end + + def test_null + records = [ + [{"0" => nil}], + ] + target = build(:null, records) + assert_equal(records, target.raw_records) + end + + def test_boolean + records = [ + [{"0" => true}], + [{"1" => nil}], + ] + target = build(:boolean, records) + assert_equal(records, target.raw_records) + end + + def test_int8 + records = [ + [{"0" => -(2 ** 7)}], + [{"1" => nil}], + ] + target = build(:int8, records) + assert_equal(records, target.raw_records) + end + + def test_uint8 + records = [ + [{"0" => (2 ** 8) - 1}], + [{"1" => nil}], + ] + target = build(:uint8, records) + assert_equal(records, target.raw_records) + end + + def test_int16 + records = [ + [{"0" => -(2 ** 15)}], + [{"1" => nil}], + ] + target = build(:int16, records) + assert_equal(records, target.raw_records) + end + + def test_uint16 + records = [ + [{"0" => (2 ** 16) - 1}], + [{"1" => nil}], + ] + target = build(:uint16, records) + assert_equal(records, target.raw_records) + end + + def test_int32 + records = [ + [{"0" => -(2 ** 31)}], + [{"1" => nil}], + ] + target = build(:int32, records) + assert_equal(records, target.raw_records) + end + + def test_uint32 + records = [ + [{"0" => (2 ** 32) - 1}], + [{"1" => nil}], + ] + target = build(:uint32, records) + assert_equal(records, target.raw_records) + end + + def test_int64 + records = [ + [{"0" => -(2 ** 63)}], + [{"1" => nil}], + ] + target = build(:int64, records) + assert_equal(records, target.raw_records) + end + + def test_uint64 + records = [ + [{"0" => (2 ** 64) - 1}], + [{"1" => nil}], + ] + target = build(:uint64, records) + assert_equal(records, target.raw_records) + end + + def test_float + records = [ + [{"0" => -1.0}], + [{"1" => nil}], + ] + target = build(:float, records) + assert_equal(records, target.raw_records) + end + + def test_double + records = [ + [{"0" => -1.0}], + [{"1" => nil}], + ] + target = build(:double, records) + assert_equal(records, target.raw_records) + end + + def test_binary + records = [ + [{"0" => "\xff".b}], + [{"1" => nil}], + ] + target = build(:binary, records) + assert_equal(records, target.raw_records) + end + + def test_string + records = [ + [{"0" => "Ruby"}], + [{"1" => nil}], + ] + target = build(:string, records) + assert_equal(records, target.raw_records) + end + + def test_date32 + records = [ + [{"0" => Date.new(1960, 1, 1)}], + [{"1" => nil}], + ] + target = build(:date32, records) + assert_equal(records, target.raw_records) + end + + def test_date64 + records = [ + [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}], + [{"1" => nil}], + ] + target = build(:date64, records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_second + records = [ + [{"0" => Time.parse("1960-01-01T02:09:30Z")}], + [{"1" => nil}], + ] + target = build({ + type: :timestamp, + unit: :second, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_milli + records = [ + [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}], + [{"1" => nil}], + ] + target = build({ + type: :timestamp, + unit: :milli, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_micro + records = [ + [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}], + [{"1" => nil}], + ] + target = build({ + type: :timestamp, + unit: :micro, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_nano + records = [ + [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}], + [{"1" => nil}], + ] + target = build({ + type: :timestamp, + unit: :nano, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + records = [ + # 00:10:00 + [{"0" => Arrow::Time.new(unit, 60 * 10)}], + [{"1" => nil}], + ] + target = build({ + type: :time32, + unit: :second, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + records = [ + # 00:10:00.123 + [{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}], + [{"1" => nil}], + ] + target = build({ + type: :time32, + unit: :milli, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + records = [ + # 00:10:00.123456 + [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}], + [{"1" => nil}], + ] + target = build({ + type: :time64, + unit: :micro, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + records = [ + # 00:10:00.123456789 + [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}], + [{"1" => nil}], + ] + target = build({ + type: :time64, + unit: :nano, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_decimal128 + records = [ + [{"0" => BigDecimal("92.92")}], + [{"1" => nil}], + ] + target = build({ + type: :decimal128, + precision: 8, + scale: 2, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_decimal256 + records = [ + [{"0" => BigDecimal("92.92")}], + [{"1" => nil}], + ] + target = build({ + type: :decimal256, + precision: 38, + scale: 2, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_list + records = [ + [{"0" => [true, nil, false]}], + [{"1" => nil}], + ] + target = build({ + type: :list, + field: { + name: :sub_element, + type: :boolean, + }, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_struct + records = [ + [{"0" => {"sub_field" => true}}], + [{"1" => nil}], + [{"0" => {"sub_field" => nil}}], + ] + target = build({ + type: :struct, + fields: [ + { + name: :sub_field, + type: :boolean, + }, + ], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_map + records = [ + [{"0" => {"key1" => true, "key2" => nil}}], + [{"1" => nil}], + ] + target = build({ + type: :map, + key: :string, + item: :boolean, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_sparse_union + omit("Need to add support for SparseUnionArrayBuilder") + records = [ + [{"0" => {"field1" => true}}], + [{"1" => nil}], + [{"0" => {"field2" => nil}}], + ] + target = build({ + type: :sparse_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_dense_union + omit("Need to add support for DenseUnionArrayBuilder") + records = [ + [{"0" => {"field1" => true}}], + [{"1" => nil}], + [{"0" => {"field2" => nil}}], + ] + target = build({ + type: :dense_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_dictionary + omit("Need to add support for DictionaryArrayBuilder") + records = [ + [{"0" => "Ruby"}], + [{"1" => nil}], + [{"0" => "GLib"}], + ] + dictionary = Arrow::StringArray.new(["GLib", "Ruby"]) + target = build({ + type: :dictionary, + index_data_type: :int8, + dictionary: dictionary, + ordered: true, + }, + records) + assert_equal(records, target.raw_records) + end +end + +class RawRecordsRecordBatchSparseUnionArrayTest < Test::Unit::TestCase + include RawRecordsSparseUnionArrayTests + + def build(type, records) + build_record_batch(type, records) + end +end + +class RawRecordsTableSparseUnionArrayTest < Test::Unit::TestCase + include RawRecordsSparseUnionArrayTests + + def build(type, records) + build_record_batch(type, records).to_table + end +end diff --git a/src/arrow/ruby/red-arrow/test/raw-records/test-struct-array.rb b/src/arrow/ruby/red-arrow/test/raw-records/test-struct-array.rb new file mode 100644 index 000000000..6c01facf8 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/raw-records/test-struct-array.rb @@ -0,0 +1,485 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module RawRecordsStructArrayTests + def build_schema(type) + field_description = { + name: :field, + } + if type.is_a?(Hash) + field_description = field_description.merge(type) + else + field_description[:type] = type + end + { + column: { + type: :struct, + fields: [ + field_description, + ], + }, + } + end + + def test_null + records = [ + [{"field" => nil}], + [nil], + ] + target = build(:null, records) + assert_equal(records, target.raw_records) + end + + def test_boolean + records = [ + [{"field" => true}], + [nil], + [{"field" => nil}], + ] + target = build(:boolean, records) + assert_equal(records, target.raw_records) + end + + def test_int8 + records = [ + [{"field" => -(2 ** 7)}], + [nil], + [{"field" => nil}], + ] + target = build(:int8, records) + assert_equal(records, target.raw_records) + end + + def test_uint8 + records = [ + [{"field" => (2 ** 8) - 1}], + [nil], + [{"field" => nil}], + ] + target = build(:uint8, records) + assert_equal(records, target.raw_records) + end + + def test_int16 + records = [ + [{"field" => -(2 ** 15)}], + [nil], + [{"field" => nil}], + ] + target = build(:int16, records) + assert_equal(records, target.raw_records) + end + + def test_uint16 + records = [ + [{"field" => (2 ** 16) - 1}], + [nil], + [{"field" => nil}], + ] + target = build(:uint16, records) + assert_equal(records, target.raw_records) + end + + def test_int32 + records = [ + [{"field" => -(2 ** 31)}], + [nil], + [{"field" => nil}], + ] + target = build(:int32, records) + assert_equal(records, target.raw_records) + end + + def test_uint32 + records = [ + [{"field" => (2 ** 32) - 1}], + [nil], + [{"field" => nil}], + ] + target = build(:uint32, records) + assert_equal(records, target.raw_records) + end + + def test_int64 + records = [ + [{"field" => -(2 ** 63)}], + [nil], + [{"field" => nil}], + ] + target = build(:int64, records) + assert_equal(records, target.raw_records) + end + + def test_uint64 + records = [ + [{"field" => (2 ** 64) - 1}], + [nil], + [{"field" => nil}], + ] + target = build(:uint64, records) + assert_equal(records, target.raw_records) + end + + def test_float + records = [ + [{"field" => -1.0}], + [nil], + [{"field" => nil}], + ] + target = build(:float, records) + assert_equal(records, target.raw_records) + end + + def test_double + records = [ + [{"field" => -1.0}], + [nil], + [{"field" => nil}], + ] + target = build(:double, records) + assert_equal(records, target.raw_records) + end + + def test_binary + records = [ + [{"field" => "\xff".b}], + [nil], + [{"field" => nil}], + ] + target = build(:binary, records) + assert_equal(records, target.raw_records) + end + + def test_string + records = [ + [{"field" => "Ruby"}], + [nil], + [{"field" => nil}], + ] + target = build(:string, records) + assert_equal(records, target.raw_records) + end + + def test_date32 + records = [ + [{"field" => Date.new(1960, 1, 1)}], + [nil], + [{"field" => nil}], + ] + target = build(:date32, records) + assert_equal(records, target.raw_records) + end + + def test_date64 + records = [ + [{"field" => DateTime.new(1960, 1, 1, 2, 9, 30)}], + [nil], + [{"field" => nil}], + ] + target = build(:date64, records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_second + records = [ + [{"field" => Time.parse("1960-01-01T02:09:30Z")}], + [nil], + [{"field" => nil}], + ] + target = build({ + type: :timestamp, + unit: :second, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_milli + records = [ + [{"field" => Time.parse("1960-01-01T02:09:30.123Z")}], + [nil], + [{"field" => nil}], + ] + target = build({ + type: :timestamp, + unit: :milli, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_micro + records = [ + [{"field" => Time.parse("1960-01-01T02:09:30.123456Z")}], + [nil], + [{"field" => nil}], + ] + target = build({ + type: :timestamp, + unit: :micro, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_timestamp_nano + records = [ + [{"field" => Time.parse("1960-01-01T02:09:30.123456789Z")}], + [nil], + [{"field" => nil}], + ] + target = build({ + type: :timestamp, + unit: :nano, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + records = [ + # 00:10:00 + [{"field" => Arrow::Time.new(unit, 60 * 10)}], + [nil], + [{"field" => nil}], + ] + target = build({ + type: :time32, + unit: :second, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + records = [ + # 00:10:00.123 + [{"field" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}], + [nil], + [{"field" => nil}], + ] + target = build({ + type: :time32, + unit: :milli, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + records = [ + # 00:10:00.123456 + [{"field" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}], + [nil], + [{"field" => nil}], + ] + target = build({ + type: :time64, + unit: :micro, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + records = [ + # 00:10:00.123456789 + [{"field" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}], + [nil], + [{"field" => nil}], + ] + target = build({ + type: :time64, + unit: :nano, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_decimal128 + records = [ + [{"field" => BigDecimal("92.92")}], + [nil], + [{"field" => nil}], + ] + target = build({ + type: :decimal128, + precision: 8, + scale: 2, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_decimal256 + records = [ + [{"field" => BigDecimal("92.92")}], + [nil], + [{"field" => nil}], + ] + target = build({ + type: :decimal256, + precision: 38, + scale: 2, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_list + records = [ + [{"field" => [true, nil, false]}], + [nil], + [{"field" => nil}], + ] + target = build({ + type: :list, + field: { + name: :sub_element, + type: :boolean, + }, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_struct + records = [ + [{"field" => {"sub_field" => true}}], + [nil], + [{"field" => nil}], + [{"field" => {"sub_field" => nil}}], + ] + target = build({ + type: :struct, + fields: [ + { + name: :sub_field, + type: :boolean, + }, + ], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_map + records = [ + [{"field" => {"key1" => true, "key2" => nil}}], + [nil], + [{"field" => nil}], + ] + target = build({ + type: :map, + key: :string, + item: :boolean, + }, + records) + assert_equal(records, target.raw_records) + end + + def test_sparse_union + omit("Need to add support for SparseUnionArrayBuilder") + records = [ + [{"field" => {"field1" => true}}], + [nil], + [{"field" => nil}], + [{"field" => {"field2" => nil}}], + ] + target = build({ + type: :sparse_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_dense_union + omit("Need to add support for DenseUnionArrayBuilder") + records = [ + [{"field" => {"field1" => true}}], + [nil], + [{"field" => nil}], + [{"field" => {"field2" => nil}}], + ] + target = build({ + type: :dense_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + assert_equal(records, target.raw_records) + end + + def test_dictionary + omit("Need to add support for DictionaryArrayBuilder") + records = [ + [{"field" => "Ruby"}], + [nil], + [{"field" => nil}], + [{"field" => "GLib"}], + ] + dictionary = Arrow::StringArray.new(["GLib", "Ruby"]) + target = build({ + type: :dictionary, + index_data_type: :int8, + dictionary: dictionary, + ordered: true, + }, + records) + assert_equal(records, target.raw_records) + end +end + +class RawRecordsRecordBatchStructArrayTest < Test::Unit::TestCase + include RawRecordsStructArrayTests + + def build(type, records) + Arrow::RecordBatch.new(build_schema(type), records) + end +end + +class RawRecordsTableStructArrayTest < Test::Unit::TestCase + include RawRecordsStructArrayTests + + def build(type, records) + Arrow::Table.new(build_schema(type), records) + end +end diff --git a/src/arrow/ruby/red-arrow/test/raw-records/test-table.rb b/src/arrow/ruby/red-arrow/test/raw-records/test-table.rb new file mode 100644 index 000000000..ae90217c2 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/raw-records/test-table.rb @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class RawRecordsTableTest < Test::Unit::TestCase + test("2 arrays") do + raw_record_batches = [ + [ + [true, nil, "Ruby"], + [nil, 0, "GLib"], + [false, 2 ** 8 - 1, nil], + ], + [ + [nil, 10, "A"], + [true, 20, "B"], + [false, nil, "C"], + [nil, 40, nil], + ] + ] + raw_records = raw_record_batches.inject do |all_records, record_batch| + all_records + record_batch + end + schema = [ + {name: :column0, type: :boolean}, + {name: :column1, type: :uint8}, + {name: :column2, type: :string}, + ] + record_batches = raw_record_batches.collect do |record_batch| + Arrow::RecordBatch.new(schema, record_batch) + end + table = Arrow::Table.new(schema, record_batches) + assert_equal(raw_records, table.raw_records) + end +end |