# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. class TestDictinaryArrayBuilder < Test::Unit::TestCase include Helper::Buildable def setup @values = [ *%w(foo bar foo), nil, *%w(foo baz bar baz baz) ] end sub_test_case("BinaryDictionaryArrayBuilder") do sub_test_case("constructed from empty") do def setup super @dictionary = %w(foo bar baz) @dictionary_array = build_binary_array(@dictionary) @indices = @values.map {|x| x ? @dictionary.index(x) : nil } @indices_array = build_int8_array(@indices) @data_type = Arrow::DictionaryDataType.new(@indices_array.value_data_type, @dictionary_array.value_data_type, false) @expected_array = Arrow::DictionaryArray.new(@data_type, @indices_array, @dictionary_array) @builder = Arrow::BinaryDictionaryArrayBuilder.new @values.each do |value| if value @builder.append_value_bytes(value) else @builder.append_null end end end test("append_value") do dictionary_array = build_binary_array([*@dictionary, "qux"]) indices_array = build_int8_array([*@indices, 3]) expected_array = Arrow::DictionaryArray.new(@data_type, indices_array, dictionary_array) @builder.append_value("qux") assert_equal(expected_array, @builder.finish) end test("append_value_bytes") do dictionary_array = build_binary_array([*@dictionary, "qux"]) indices_array = build_int8_array([*@indices, 3]) expected_array = Arrow::DictionaryArray.new(@data_type, indices_array, dictionary_array) @builder.append_value_bytes("qux") assert_equal(expected_array, @builder.finish) end test("append_array") do dictionary_array = build_binary_array([*@dictionary, "qux"]) indices_array = build_int8_array([*@indices, 3, 0, nil, 2]) expected_array = Arrow::DictionaryArray.new(@data_type, indices_array, dictionary_array) @builder.append_array(build_binary_array(["qux", "foo", nil, "baz"])) assert_equal(expected_array, @builder.finish) end test("append_indices") do @builder.insert_memo_values(build_binary_array(["qux"])) dictionary_array = build_binary_array([*@dictionary, "qux"]) indices_array = build_int8_array([*@indices, 1, 2, nil, 3, 0, 1, 2, 1, 3, 0]) expected_array = Arrow::DictionaryArray.new(@data_type, indices_array, dictionary_array) @builder.append_indices([1, 2, 1, 3, 0], [true, true, false, true, true]) @builder.append_indices([1, 2, 1, 3, 0]) assert_equal(expected_array, @builder.finish) end test("append_nulls") do dictionary_array = build_binary_array([]) indices_array = build_int8_array([nil, nil, nil]) data_type = Arrow::DictionaryDataType.new(indices_array.value_data_type, dictionary_array.value_data_type, false) expected_array = Arrow::DictionaryArray.new(data_type, indices_array, dictionary_array) builder = Arrow::BinaryDictionaryArrayBuilder.new builder.append_nulls(3) assert_equal(expected_array, builder.finish) end test("append_empty_values") do dictionary_array = build_binary_array(["hello"]) indices_array = build_int8_array([0, 0, 0, 0]) data_type = Arrow::DictionaryDataType.new(indices_array.value_data_type, dictionary_array.value_data_type, false) expected_array = Arrow::DictionaryArray.new(data_type, indices_array, dictionary_array) builder = Arrow::BinaryDictionaryArrayBuilder.new builder.append_value("hello") builder.append_empty_value builder.append_empty_values(2) assert_equal(expected_array, builder.finish) end test("dictionary_length") do assert_equal(@dictionary.length, @builder.dictionary_length) end test("finish") do assert_equal(@expected_array, @builder.finish) end test("finish_delta") do assert_equal([ true, @indices_array, @dictionary_array, ], @builder.finish_delta) end test("reset") do expected_array = Arrow::DictionaryArray.new(@data_type, build_int8_array([]), @dictionary_array) @builder.reset assert_equal({ dictionary_length: @dictionary.length, array: expected_array, }, { dictionary_length: @builder.dictionary_length, array: @builder.finish, }) end test("reset_full") do expected_array = Arrow::DictionaryArray.new(@data_type, build_int8_array([]), build_binary_array([])) @builder.reset_full assert_equal({ dictionary_length: 0, array: expected_array, }, { dictionary_length: @builder.dictionary_length, array: @builder.finish, }) end end sub_test_case("constructed with memo values") do def setup super @dictionary = %w(qux foo bar baz) dictionary_array = build_binary_array(@dictionary) indices = @values.map {|x| x ? @dictionary.index(x) : nil } indices_array = build_int8_array(indices) data_type = Arrow::DictionaryDataType.new(indices_array.value_data_type, dictionary_array.value_data_type, false) @expected_array = Arrow::DictionaryArray.new(data_type, indices_array, dictionary_array) @builder = Arrow::BinaryDictionaryArrayBuilder.new @builder.insert_memo_values(dictionary_array) @values.each do |value| if value @builder.append_value_bytes(value) else @builder.append_null end end end test("dictionary_length") do assert_equal(@dictionary.length, @builder.dictionary_length) end test("finish") do assert_equal(@expected_array, @builder.finish) end end end sub_test_case("StringDictionaryArrayBuilder") do sub_test_case("constructed from empty") do def setup super @dictionary = %w(foo bar baz) @dictionary_array = build_string_array(@dictionary) @indices = @values.map {|x| x ? @dictionary.index(x) : nil } @indices_array = build_int8_array(@indices) @data_type = Arrow::DictionaryDataType.new(@indices_array.value_data_type, @dictionary_array.value_data_type, false) @expected_array = Arrow::DictionaryArray.new(@data_type, @indices_array, @dictionary_array) @builder = Arrow::StringDictionaryArrayBuilder.new @values.each do |value| if value @builder.append_string(value) else @builder.append_null end end end test("append_string") do dictionary_array = build_string_array([*@dictionary, "qux"]) indices_array = build_int8_array([*@indices, 3]) expected_array = Arrow::DictionaryArray.new(@data_type, indices_array, dictionary_array) @builder.append_string("qux") assert_equal(expected_array, @builder.finish) end test("append_array") do dictionary_array = build_string_array([*@dictionary, "qux"]) indices_array = build_int8_array([*@indices, 3, 0, nil, 2]) expected_array = Arrow::DictionaryArray.new(@data_type, indices_array, dictionary_array) @builder.append_array(build_string_array(["qux", "foo", nil, "baz"])) assert_equal(expected_array, @builder.finish) end test("append_indices") do @builder.insert_memo_values(build_string_array(["qux"])) dictionary_array = build_string_array([*@dictionary, "qux"]) indices_array = build_int8_array([*@indices, 1, 2, nil, 3, 0, 1, 2, 1, 3, 0]) expected_array = Arrow::DictionaryArray.new(@data_type, indices_array, dictionary_array) @builder.append_indices([1, 2, 1, 3, 0], [true, true, false, true, true]) @builder.append_indices([1, 2, 1, 3, 0]) assert_equal(expected_array, @builder.finish) end test("append_nulls") do dictionary_array = build_string_array([]) indices_array = build_int8_array([nil, nil, nil]) data_type = Arrow::DictionaryDataType.new(indices_array.value_data_type, dictionary_array.value_data_type, false) expected_array = Arrow::DictionaryArray.new(data_type, indices_array, dictionary_array) builder = Arrow::StringDictionaryArrayBuilder.new builder.append_nulls(3) assert_equal(expected_array, builder.finish) end test("append_empty_values") do dictionary_array = build_string_array(["hello"]) indices_array = build_int8_array([0, 0, 0, 0]) data_type = Arrow::DictionaryDataType.new(indices_array.value_data_type, dictionary_array.value_data_type, false) expected_array = Arrow::DictionaryArray.new(data_type, indices_array, dictionary_array) builder = Arrow::StringDictionaryArrayBuilder.new builder.append_string("hello") builder.append_empty_value builder.append_empty_values(2) assert_equal(expected_array, builder.finish) end test("dictionary_length") do assert_equal(@dictionary.length, @builder.dictionary_length) end test("finish") do assert_equal(@expected_array, @builder.finish) end test("finish_delta") do assert_equal([ true, @indices_array, @dictionary_array, ], @builder.finish_delta) end test("reset") do expected_array = Arrow::DictionaryArray.new(@data_type, build_int8_array([]), @dictionary_array) @builder.reset assert_equal({ dictionary_length: @dictionary.length, array: expected_array, }, { dictionary_length: @builder.dictionary_length, array: @builder.finish, }) end test("reset_full") do expected_array = Arrow::DictionaryArray.new(@data_type, build_int8_array([]), build_string_array([])) @builder.reset_full assert_equal({ dictionary_length: 0, array: expected_array, }, { dictionary_length: @builder.dictionary_length, array: @builder.finish, }) end end sub_test_case("constructed with memo values") do def setup super @dictionary = %w(qux foo bar baz) dictionary_array = build_string_array(@dictionary) indices = @values.map {|x| x ? @dictionary.index(x) : nil } indices_array = build_int8_array(indices) data_type = Arrow::DictionaryDataType.new(indices_array.value_data_type, dictionary_array.value_data_type, false) @expected_array = Arrow::DictionaryArray.new(data_type, indices_array, dictionary_array) @builder = Arrow::StringDictionaryArrayBuilder.new @builder.insert_memo_values(dictionary_array) @values.each do |value| if value @builder.append_string(value) else @builder.append_null end end end test("dictionary_length") do assert_equal(@dictionary.length, @builder.dictionary_length) end test("finish") do assert_equal(@expected_array, @builder.finish) end end end end